From f83644ffd5f86b82ec33f97e6e0e360157f59e7c Mon Sep 17 00:00:00 2001 From: Brian Harring Date: Fri, 21 Feb 2014 14:23:21 +0000 Subject: Drop the cvs2svn libs; no longer needed/used --- cvs2svn_lib/__init__.py | 18 - cvs2svn_lib/apple_single_filter.py | 292 ---- cvs2svn_lib/artifact.py | 59 - cvs2svn_lib/artifact_manager.py | 256 ---- cvs2svn_lib/bzr_run_options.py | 175 --- cvs2svn_lib/changeset.py | 269 ---- cvs2svn_lib/changeset_database.py | 70 - cvs2svn_lib/changeset_graph.py | 456 ------ cvs2svn_lib/changeset_graph_link.py | 149 -- cvs2svn_lib/changeset_graph_node.py | 50 - cvs2svn_lib/check_dependencies_pass.py | 144 -- cvs2svn_lib/checkout_internal.py | 778 ----------- cvs2svn_lib/collect_data.py | 1431 ------------------- cvs2svn_lib/common.py | 409 ------ cvs2svn_lib/config.py | 221 --- cvs2svn_lib/context.py | 93 -- cvs2svn_lib/cvs_file.py | 287 ---- cvs2svn_lib/cvs_file_database.py | 75 - cvs2svn_lib/cvs_file_items.py | 1075 --------------- cvs2svn_lib/cvs_item.py | 901 ------------ cvs2svn_lib/cvs_item_database.py | 248 ---- cvs2svn_lib/cvs_revision_manager.py | 85 -- cvs2svn_lib/database.py | 322 ----- cvs2svn_lib/dumpfile_delegate.py | 510 ------- cvs2svn_lib/fill_source.py | 192 --- cvs2svn_lib/fulltext_revision_recorder.py | 127 -- cvs2svn_lib/git_output_option.py | 658 --------- cvs2svn_lib/git_revision_recorder.py | 114 -- cvs2svn_lib/git_run_options.py | 274 ---- cvs2svn_lib/key_generator.py | 45 - cvs2svn_lib/log.py | 174 --- cvs2svn_lib/main.py | 117 -- cvs2svn_lib/man_writer.py | 197 --- cvs2svn_lib/metadata.py | 26 - cvs2svn_lib/metadata_database.py | 102 -- cvs2svn_lib/openings_closings.py | 236 ---- cvs2svn_lib/output_option.py | 85 -- cvs2svn_lib/pass_manager.py | 215 --- cvs2svn_lib/passes.py | 1837 ------------------------- cvs2svn_lib/persistence_manager.py | 106 -- cvs2svn_lib/process.py | 116 -- cvs2svn_lib/project.py | 219 --- cvs2svn_lib/property_setters.py | 385 ------ cvs2svn_lib/rcs_revision_manager.py | 51 - cvs2svn_lib/rcs_stream.py | 149 -- cvs2svn_lib/record_table.py | 399 ------ cvs2svn_lib/repository_delegate.py | 98 -- cvs2svn_lib/repository_mirror.py | 897 ------------ cvs2svn_lib/revision_manager.py | 189 --- cvs2svn_lib/run_options.py | 1035 -------------- cvs2svn_lib/serializer.py | 146 -- cvs2svn_lib/stats_keeper.py | 189 --- cvs2svn_lib/stdout_delegate.py | 107 -- cvs2svn_lib/svn_commit.py | 381 ----- cvs2svn_lib/svn_commit_creator.py | 217 --- cvs2svn_lib/svn_commit_item.py | 50 - cvs2svn_lib/svn_output_option.py | 753 ---------- cvs2svn_lib/svn_repository_delegate.py | 121 -- cvs2svn_lib/svn_revision_range.py | 171 --- cvs2svn_lib/svn_run_options.py | 543 -------- cvs2svn_lib/symbol.py | 246 ---- cvs2svn_lib/symbol_database.py | 68 - cvs2svn_lib/symbol_statistics.py | 521 ------- cvs2svn_lib/symbol_strategy.py | 685 --------- cvs2svn_lib/symbol_transform.py | 236 ---- cvs2svn_lib/time_range.py | 44 - cvs2svn_lib/version.py | 27 - cvs2svn_rcsparse/__init__.py | 26 - cvs2svn_rcsparse/common.py | 324 ----- cvs2svn_rcsparse/debug.py | 122 -- cvs2svn_rcsparse/default.py | 172 --- cvs2svn_rcsparse/parse_rcs_file.py | 73 - cvs2svn_rcsparse/rcparse_redundant_work.patch | 99 -- cvs2svn_rcsparse/run-tests.py | 73 - cvs2svn_rcsparse/texttools.py | 348 ----- 75 files changed, 22158 deletions(-) delete mode 100644 cvs2svn_lib/__init__.py delete mode 100644 cvs2svn_lib/apple_single_filter.py delete mode 100644 cvs2svn_lib/artifact.py delete mode 100644 cvs2svn_lib/artifact_manager.py delete mode 100644 cvs2svn_lib/bzr_run_options.py delete mode 100644 cvs2svn_lib/changeset.py delete mode 100644 cvs2svn_lib/changeset_database.py delete mode 100644 cvs2svn_lib/changeset_graph.py delete mode 100644 cvs2svn_lib/changeset_graph_link.py delete mode 100644 cvs2svn_lib/changeset_graph_node.py delete mode 100644 cvs2svn_lib/check_dependencies_pass.py delete mode 100644 cvs2svn_lib/checkout_internal.py delete mode 100644 cvs2svn_lib/collect_data.py delete mode 100644 cvs2svn_lib/common.py delete mode 100644 cvs2svn_lib/config.py delete mode 100644 cvs2svn_lib/context.py delete mode 100644 cvs2svn_lib/cvs_file.py delete mode 100644 cvs2svn_lib/cvs_file_database.py delete mode 100644 cvs2svn_lib/cvs_file_items.py delete mode 100644 cvs2svn_lib/cvs_item.py delete mode 100644 cvs2svn_lib/cvs_item_database.py delete mode 100644 cvs2svn_lib/cvs_revision_manager.py delete mode 100644 cvs2svn_lib/database.py delete mode 100644 cvs2svn_lib/dumpfile_delegate.py delete mode 100644 cvs2svn_lib/fill_source.py delete mode 100644 cvs2svn_lib/fulltext_revision_recorder.py delete mode 100644 cvs2svn_lib/git_output_option.py delete mode 100644 cvs2svn_lib/git_revision_recorder.py delete mode 100644 cvs2svn_lib/git_run_options.py delete mode 100644 cvs2svn_lib/key_generator.py delete mode 100644 cvs2svn_lib/log.py delete mode 100644 cvs2svn_lib/main.py delete mode 100644 cvs2svn_lib/man_writer.py delete mode 100644 cvs2svn_lib/metadata.py delete mode 100644 cvs2svn_lib/metadata_database.py delete mode 100644 cvs2svn_lib/openings_closings.py delete mode 100644 cvs2svn_lib/output_option.py delete mode 100644 cvs2svn_lib/pass_manager.py delete mode 100644 cvs2svn_lib/passes.py delete mode 100644 cvs2svn_lib/persistence_manager.py delete mode 100644 cvs2svn_lib/process.py delete mode 100644 cvs2svn_lib/project.py delete mode 100644 cvs2svn_lib/property_setters.py delete mode 100644 cvs2svn_lib/rcs_revision_manager.py delete mode 100644 cvs2svn_lib/rcs_stream.py delete mode 100644 cvs2svn_lib/record_table.py delete mode 100644 cvs2svn_lib/repository_delegate.py delete mode 100644 cvs2svn_lib/repository_mirror.py delete mode 100644 cvs2svn_lib/revision_manager.py delete mode 100644 cvs2svn_lib/run_options.py delete mode 100644 cvs2svn_lib/serializer.py delete mode 100644 cvs2svn_lib/stats_keeper.py delete mode 100644 cvs2svn_lib/stdout_delegate.py delete mode 100644 cvs2svn_lib/svn_commit.py delete mode 100644 cvs2svn_lib/svn_commit_creator.py delete mode 100644 cvs2svn_lib/svn_commit_item.py delete mode 100644 cvs2svn_lib/svn_output_option.py delete mode 100644 cvs2svn_lib/svn_repository_delegate.py delete mode 100644 cvs2svn_lib/svn_revision_range.py delete mode 100644 cvs2svn_lib/svn_run_options.py delete mode 100644 cvs2svn_lib/symbol.py delete mode 100644 cvs2svn_lib/symbol_database.py delete mode 100644 cvs2svn_lib/symbol_statistics.py delete mode 100644 cvs2svn_lib/symbol_strategy.py delete mode 100644 cvs2svn_lib/symbol_transform.py delete mode 100644 cvs2svn_lib/time_range.py delete mode 100644 cvs2svn_lib/version.py delete mode 100644 cvs2svn_rcsparse/__init__.py delete mode 100644 cvs2svn_rcsparse/common.py delete mode 100644 cvs2svn_rcsparse/debug.py delete mode 100644 cvs2svn_rcsparse/default.py delete mode 100644 cvs2svn_rcsparse/parse_rcs_file.py delete mode 100644 cvs2svn_rcsparse/rcparse_redundant_work.patch delete mode 100644 cvs2svn_rcsparse/run-tests.py delete mode 100644 cvs2svn_rcsparse/texttools.py diff --git a/cvs2svn_lib/__init__.py b/cvs2svn_lib/__init__.py deleted file mode 100644 index 838d4c6..0000000 --- a/cvs2svn_lib/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2000-2008 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""This package contains modules that support cvs2svn.""" - diff --git a/cvs2svn_lib/apple_single_filter.py b/cvs2svn_lib/apple_single_filter.py deleted file mode 100644 index 95fa9cb..0000000 --- a/cvs2svn_lib/apple_single_filter.py +++ /dev/null @@ -1,292 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2007-2008 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""A stream filter for extracting the data fork from AppleSingle data. - -Some Macintosh CVS clients store resource fork data along with the -contents of the file (called the data fork) by encoding both in an -'AppleSingle' data stream before storing them to CVS. This file -contains a stream filter for extracting the data fork from such data -streams. (Any other forks are discarded.) - -See the following for some random information about this format and -how it is used by Macintosh CVS clients: - - http://users.phg-online.de/tk/netatalk/doc/Apple/v1/ - http://rfc.net/rfc1740.html - http://ximbiot.com/cvs/cvshome/cyclic/cvs/dev-mac.html - http://www.maccvs.org/faq.html#resfiles - http://www.heilancoo.net/MacCVSClient/MacCVSClientDoc/storage-formats.html - -""" - - -import struct -from cStringIO import StringIO - - -class AppleSingleFormatError(IOError): - """The stream was not in correct AppleSingle format.""" - - pass - - -class AppleSingleIncorrectMagicError(AppleSingleFormatError): - """The file didn't start with the correct magic number.""" - - def __init__(self, data_read, eof): - AppleSingleFormatError.__init__(self) - self.data_read = data_read - self.eof = eof - - -class AppleSingleEOFError(AppleSingleFormatError): - """EOF was reached where AppleSingle doesn't allow it.""" - - pass - - -class AppleSingleFilter(object): - """A stream that reads the data fork from an AppleSingle stream. - - If the constructor discovers that the file is not a legitimate - AppleSingle stream, then it raises an AppleSingleFormatError. In - the special case that the magic number is incorrect, it raises - AppleSingleIncorrectMagicError with data_read set to the data that - have been read so far from the input stream. (This allows the - caller the option to fallback to treating the input stream as a - normal binary data stream.)""" - - # The header is: - # - # Magic number 4 bytes - # Version number 4 bytes - # File system or filler 16 bytes - # Number of entries 2 bytes - magic_struct = '>i' - magic_len = struct.calcsize(magic_struct) - - # The part of the header after the magic number: - rest_of_header_struct = '>i16sH' - rest_of_header_len = struct.calcsize(rest_of_header_struct) - - # Each entry is: - # - # Entry ID 4 bytes - # Offset 4 bytes - # Length 4 bytes - entry_struct = '>iii' - entry_len = struct.calcsize(entry_struct) - - apple_single_magic = 0x00051600 - apple_single_version_1 = 0x00010000 - apple_single_version_2 = 0x00020000 - apple_single_filler = '\0' * 16 - - apple_single_data_fork_entry_id = 1 - - def __init__(self, stream): - self.stream = stream - - # Check for the AppleSingle magic number: - s = self._read_exactly(self.magic_len) - if len(s) < self.magic_len: - raise AppleSingleIncorrectMagicError(s, True) - - (magic,) = struct.unpack(self.magic_struct, s) - if magic != self.apple_single_magic: - raise AppleSingleIncorrectMagicError(s, False) - - # Read the rest of the header: - s = self._read_exactly(self.rest_of_header_len) - if len(s) < self.rest_of_header_len: - raise AppleSingleEOFError('AppleSingle header incomplete') - - (version, filler, num_entries) = \ - struct.unpack(self.rest_of_header_struct, s) - - if version == self.apple_single_version_1: - self._prepare_apple_single_v1_file(num_entries) - elif version == self.apple_single_version_2: - if filler != self.apple_single_filler: - raise AppleSingleFormatError('Incorrect filler') - self._prepare_apple_single_v2_file(num_entries) - else: - raise AppleSingleFormatError('Unknown AppleSingle version') - - def _read_exactly(self, size): - """Read and return exactly SIZE characters from the stream. - - This method is to deal with the fact that stream.read(size) is - allowed to return less than size characters. If EOF is reached - before SIZE characters have been read, return the characters that - have been read so far.""" - - retval = [] - length_remaining = size - while length_remaining > 0: - s = self.stream.read(length_remaining) - if not s: - break - retval.append(s) - length_remaining -= len(s) - - return ''.join(retval) - - def _prepare_apple_single_file(self, num_entries): - entries = self._read_exactly(num_entries * self.entry_len) - if len(entries) < num_entries * self.entry_len: - raise AppleSingleEOFError('Incomplete entries list') - - for i in range(num_entries): - entry = entries[i * self.entry_len : (i + 1) * self.entry_len] - (entry_id, offset, length) = struct.unpack(self.entry_struct, entry) - if entry_id == self.apple_single_data_fork_entry_id: - break - else: - raise AppleSingleFormatError('No data fork found') - - # The data fork is located at [offset : offset + length]. Read up - # to the start of the data: - n = offset - self.magic_len - self.rest_of_header_len - len(entries) - if n < 0: - raise AppleSingleFormatError('Invalid offset to AppleSingle data fork') - - max_chunk_size = 65536 - while n > 0: - s = self.stream.read(min(n, max_chunk_size)) - if not s: - raise AppleSingleEOFError( - 'Offset to AppleSingle data fork past end of file' - ) - n -= len(s) - - self.length_remaining = length - - def _prepare_apple_single_v1_file(self, num_entries): - self._prepare_apple_single_file(num_entries) - - def _prepare_apple_single_v2_file(self, num_entries): - self._prepare_apple_single_file(num_entries) - - def read(self, size=-1): - if size == 0 or self.length_remaining == 0: - return '' - elif size < 0: - s = self._read_exactly(self.length_remaining) - if len(s) < self.length_remaining: - raise AppleSingleEOFError('AppleSingle data fork truncated') - self.length_remaining = 0 - return s - else: - # The length of this read is allowed to be shorter than the - # requested size: - s = self.stream.read(min(size, self.length_remaining)) - if not s: - raise AppleSingleEOFError() - self.length_remaining -= len(s) - return s - - def close(self): - self.stream.close() - self.stream = None - - -class CompoundStream(object): - """A stream that reads from a series of streams, one after the other.""" - - def __init__(self, *streams): - self.streams = list(streams) - self.stream_index = 0 - - def read(self, size=-1): - if size < 0: - retval = [] - while self.stream_index < len(self.streams): - retval.append(self.streams[self.stream_index].read()) - self.stream_index += 1 - return ''.join(retval) - else: - while self.stream_index < len(self.streams): - s = self.streams[self.stream_index].read(size) - if s: - # This may not be the full size requested, but that is OK: - return s - else: - # That stream was empty; proceed to the next stream: - self.stream_index += 1 - - # No streams are left: - return '' - - def close(self): - for stream in self.streams: - stream.close() - self.streams = None - - -def get_maybe_apple_single_stream(stream): - """Treat STREAM as AppleSingle if possible; otherwise treat it literally. - - If STREAM is in AppleSingle format, then return a stream that will - output the data fork of the original stream. Otherwise, return a - stream that will output the original file contents literally. - - Be careful not to read from STREAM after it has already hit EOF.""" - - try: - return AppleSingleFilter(stream) - except AppleSingleIncorrectMagicError, e: - # This is OK; the file is not AppleSingle, so we read it normally: - string_io = StringIO(e.data_read) - if e.eof: - # The original stream already reached EOF, so the part already - # read contains the complete file contents: - return string_io - else: - # The stream needs to output the part already read followed by - # whatever hasn't been read of the original stream: - return CompoundStream(string_io, stream) - - -if __name__ == '__main__': - # For fun and testing, allow use of this file as a pipe if it is - # invoked as a script. Specifically, if stdin is in AppleSingle - # format, then output only its data fork; otherwise, output it - # unchanged. - # - # This might not work on systems where sys.stdin is opened in text - # mode. - # - # Remember to set PYTHONPATH to point to the main cvs2svn directory. - - import sys - - #CHUNK_SIZE = -1 - CHUNK_SIZE = 100 - - f = get_maybe_apple_single_stream(sys.stdin) - - if CHUNK_SIZE < 0: - sys.stdout.write(f.read()) - else: - while True: - s = f.read(CHUNK_SIZE) - if not s: - break - sys.stdout.write(s) - - diff --git a/cvs2svn_lib/artifact.py b/cvs2svn_lib/artifact.py deleted file mode 100644 index 99d6945..0000000 --- a/cvs2svn_lib/artifact.py +++ /dev/null @@ -1,59 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2000-2008 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""This module defines Artifact types to be used with an ArtifactManager.""" - - -import os - -from cvs2svn_lib.context import Ctx -from cvs2svn_lib.log import Log - - -class Artifact(object): - """An object that is created, used across passes, then cleaned up.""" - - def __init__(self): - # The set of passes that need this artifact. This field is - # maintained by ArtifactManager. - self._passes_needed = set() - - def cleanup(self): - """This artifact is no longer needed; clean it up.""" - - pass - - -class TempFile(Artifact): - """A temporary file that can be used across cvs2svn passes.""" - - def __init__(self, basename): - Artifact.__init__(self) - self.basename = basename - - def _get_filename(self): - return Ctx().get_temp_filename(self.basename) - - filename = property(_get_filename) - - def cleanup(self): - Log().verbose("Deleting", self.filename) - os.unlink(self.filename) - - def __str__(self): - return 'Temporary file %r' % (self.filename,) - - diff --git a/cvs2svn_lib/artifact_manager.py b/cvs2svn_lib/artifact_manager.py deleted file mode 100644 index 08f0ec7..0000000 --- a/cvs2svn_lib/artifact_manager.py +++ /dev/null @@ -1,256 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2000-2008 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""This module manages the artifacts produced by conversion passes.""" - - -from cvs2svn_lib.log import Log -from cvs2svn_lib.artifact import TempFile - - -class ArtifactNotActiveError(Exception): - """An artifact was requested when no passes that have registered - that they need it are active.""" - - def __init__(self, artifact_name): - Exception.__init__( - self, 'Artifact %s is not currently active' % artifact_name) - - -class ArtifactManager: - """Manage artifacts that are created by one pass but needed by others. - - This class is responsible for cleaning up artifacts once they are no - longer needed. The trick is that cvs2svn can be run pass by pass, - so not all passes might be executed during a specific program run. - - To use this class: - - - Call artifact_manager.set_artifact(name, artifact) once for each - known artifact. - - - Call artifact_manager.creates(which_pass, artifact) to indicate - that WHICH_PASS is the pass that creates ARTIFACT. - - - Call artifact_manager.uses(which_pass, artifact) to indicate that - WHICH_PASS needs to use ARTIFACT. - - There are also helper methods register_temp_file(), - register_artifact_needed(), and register_temp_file_needed() which - combine some useful operations. - - Then, in pass order: - - - Call pass_skipped() for any passes that were already executed - during a previous cvs2svn run. - - - Call pass_started() when a pass is about to start execution. - - - If a pass that has been started will be continued during the next - program run, then call pass_continued(). - - - If a pass that has been started finishes execution, call - pass_done(), to allow any artifacts that won't be needed anymore - to be cleaned up. - - - Call pass_deferred() for any passes that have been deferred to a - future cvs2svn run. - - Finally: - - - Call check_clean() to verify that all artifacts have been - accounted for.""" - - def __init__(self): - # A map { artifact_name : artifact } of known artifacts. - self._artifacts = { } - - # A map { pass : set_of_artifacts }, where set_of_artifacts is a - # set of artifacts needed by the pass. - self._pass_needs = { } - - # A set of passes that are currently being executed. - self._active_passes = set() - - def set_artifact(self, name, artifact): - """Add ARTIFACT to the list of artifacts that we manage. - - Store it under NAME.""" - - assert name not in self._artifacts - self._artifacts[name] = artifact - - def get_artifact(self, name): - """Return the artifact with the specified name. - - If the artifact does not currently exist, raise a KeyError. If it - is not registered as being needed by one of the active passes, - raise an ArtifactNotActiveError.""" - - artifact = self._artifacts[name] - for active_pass in self._active_passes: - if artifact in self._pass_needs[active_pass]: - # OK - return artifact - else: - raise ArtifactNotActiveError(name) - - def creates(self, which_pass, artifact): - """Register that WHICH_PASS creates ARTIFACT. - - ARTIFACT must already have been registered.""" - - # An artifact is automatically "needed" in the pass in which it is - # created: - self.uses(which_pass, artifact) - - def uses(self, which_pass, artifact): - """Register that WHICH_PASS uses ARTIFACT. - - ARTIFACT must already have been registered.""" - - artifact._passes_needed.add(which_pass) - if which_pass in self._pass_needs: - self._pass_needs[which_pass].add(artifact) - else: - self._pass_needs[which_pass] = set([artifact]) - - def register_temp_file(self, basename, which_pass): - """Register a temporary file with base name BASENAME as an artifact. - - Return the filename of the temporary file.""" - - artifact = TempFile(basename) - self.set_artifact(basename, artifact) - self.creates(which_pass, artifact) - - def get_temp_file(self, basename): - """Return the filename of the temporary file with the specified BASENAME. - - If the temporary file is not an existing, registered TempFile, - raise a KeyError.""" - - return self.get_artifact(basename).filename - - def register_artifact_needed(self, artifact_name, which_pass): - """Register that WHICH_PASS uses the artifact named ARTIFACT_NAME. - - An artifact with this name must already have been registered.""" - - artifact = self._artifacts[artifact_name] - artifact._passes_needed.add(which_pass) - if which_pass in self._pass_needs: - self._pass_needs[which_pass].add(artifact) - else: - self._pass_needs[which_pass] = set([artifact,]) - - def register_temp_file_needed(self, basename, which_pass): - """Register that a temporary file is needed by WHICH_PASS. - - Register that the temporary file with base name BASENAME is needed - by WHICH_PASS.""" - - self.register_artifact_needed(basename, which_pass) - - def _unregister_artifacts(self, which_pass): - """Unregister any artifacts that were needed for WHICH_PASS. - - Return a list of artifacts that are no longer needed at all.""" - - try: - artifacts = list(self._pass_needs[which_pass]) - except KeyError: - # No artifacts were needed for that pass: - return [] - - del self._pass_needs[which_pass] - - unneeded_artifacts = [] - for artifact in artifacts: - artifact._passes_needed.remove(which_pass) - if not artifact._passes_needed: - unneeded_artifacts.append(artifact) - - return unneeded_artifacts - - def pass_skipped(self, which_pass): - """WHICH_PASS was executed during a previous cvs2svn run. - - Its artifacts were created then, and any artifacts that would - normally be cleaned up after this pass have already been cleaned - up.""" - - self._unregister_artifacts(which_pass) - - def pass_started(self, which_pass): - """WHICH_PASS is starting.""" - - self._active_passes.add(which_pass) - - def pass_continued(self, which_pass): - """WHICH_PASS will be continued during the next program run. - - WHICH_PASS, which has already been started, will be continued - during the next program run. Unregister any artifacts that would - be cleaned up at the end of WHICH_PASS without actually cleaning - them up.""" - - self._active_passes.remove(which_pass) - self._unregister_artifacts(which_pass) - - def pass_done(self, which_pass, skip_cleanup): - """WHICH_PASS is done. - - Clean up all artifacts that are no longer needed. If SKIP_CLEANUP - is True, then just do the bookkeeping without actually calling - artifact.cleanup().""" - - self._active_passes.remove(which_pass) - artifacts = self._unregister_artifacts(which_pass) - if not skip_cleanup: - for artifact in artifacts: - artifact.cleanup() - - def pass_deferred(self, which_pass): - """WHICH_PASS is being deferred until a future cvs2svn run. - - Unregister any artifacts that would be cleaned up during - WHICH_PASS.""" - - self._unregister_artifacts(which_pass) - - def check_clean(self): - """All passes have been processed. - - Output a warning messages if all artifacts have not been accounted - for. (This is mainly a consistency check, that no artifacts were - registered under nonexistent passes.)""" - - unclean_artifacts = [ - str(artifact) - for artifact in self._artifacts.values() - if artifact._passes_needed] - - if unclean_artifacts: - Log().warn( - 'INTERNAL: The following artifacts were not cleaned up:\n %s\n' - % ('\n '.join(unclean_artifacts))) - - -# The default ArtifactManager instance: -artifact_manager = ArtifactManager() - - diff --git a/cvs2svn_lib/bzr_run_options.py b/cvs2svn_lib/bzr_run_options.py deleted file mode 100644 index 5332dff..0000000 --- a/cvs2svn_lib/bzr_run_options.py +++ /dev/null @@ -1,175 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2000-2009 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""This module manages cvs2bzr run options.""" - - -import sys -import datetime -import codecs - -from cvs2svn_lib.version import VERSION -from cvs2svn_lib.common import FatalError -from cvs2svn_lib.context import Ctx -from cvs2svn_lib.run_options import not_both -from cvs2svn_lib.run_options import RunOptions -from cvs2svn_lib.run_options import ContextOption -from cvs2svn_lib.run_options import IncompatibleOption -from cvs2svn_lib.run_options import authors -from cvs2svn_lib.man_writer import ManWriter -from cvs2svn_lib.rcs_revision_manager import RCSRevisionReader -from cvs2svn_lib.cvs_revision_manager import CVSRevisionReader -from cvs2svn_lib.git_run_options import GitRunOptions -from cvs2svn_lib.git_output_option import GitRevisionInlineWriter -from cvs2svn_lib.git_output_option import GitOutputOption -from cvs2svn_lib.revision_manager import NullRevisionRecorder -from cvs2svn_lib.revision_manager import NullRevisionExcluder - - -short_desc = 'convert a cvs repository into a Bazaar repository' - -synopsis = """\ -.B cvs2bzr -[\\fIOPTION\\fR]... \\fIOUTPUT-OPTIONS CVS-REPOS-PATH\\fR -.br -.B cvs2bzr -[\\fIOPTION\\fR]... \\fI--options=PATH\\fR -""" - -description="""\ -Convert a CVS repository into a Bazaar repository, including history. - -""" -long_desc = """\ -Create a new Bazaar repository based on the version history stored in a -CVS repository. Each CVS commit will be mirrored in the Bazaar -repository, including such information as date of commit and id of the -committer. -.P -The output of this program is a "fast-import dumpfile", which -can be loaded into a Bazaar repository using the Bazaar FastImport -Plugin, available from https://launchpad.net/bzr-fastimport. - -.P -\\fICVS-REPOS-PATH\\fR is the filesystem path of the part of the CVS -repository that you want to convert. This path doesn't have to be the -top level directory of a CVS repository; it can point at a project -within a repository, in which case only that project will be -converted. This path or one of its parent directories has to contain -a subdirectory called CVSROOT (though the CVSROOT directory can be -empty). -.P -It is not possible directly to convert a CVS repository to which you -only have remote access, but the FAQ describes tools that may be used -to create a local copy of a remote CVS repository. -""" - -files = """\ -A directory called \\fIcvs2svn-tmp\\fR (or the directory specified by -\\fB--tmpdir\\fR) is used as scratch space for temporary data files. -""" - -see_also = [ - ('cvs', '1'), - ('bzr', '1'), - ] - - -class BzrRunOptions(GitRunOptions): - - def get_description(self): - return description - - def _get_output_options_group(self): - group = RunOptions._get_output_options_group(self) - - group.add_option(IncompatibleOption( - '--dumpfile', type='string', - action='store', - help='path to which the data should be written', - man_help=( - 'Write the blobs and revision data to \\fIpath\\fR.' - ), - metavar='PATH', - )) - group.add_option(ContextOption( - '--dry-run', - action='store_true', - help=( - 'do not create any output; just print what would happen.' - ), - man_help=( - 'Do not create any output; just print what would happen.' - ), - )) - - return group - - def callback_manpage(self, option, opt_str, value, parser): - f = codecs.getwriter('utf_8')(sys.stdout) - ManWriter( - parser, - section='1', - date=datetime.date.today(), - source='Version %s' % (VERSION,), - manual='User Commands', - short_desc=short_desc, - synopsis=synopsis, - long_desc=long_desc, - files=files, - authors=authors, - see_also=see_also, - ).write_manpage(f) - sys.exit(0) - - def process_io_options(self): - """Process input/output options. - - Process options related to extracting data from the CVS repository - and writing to a Bazaar-friendly fast-import file.""" - - ctx = Ctx() - options = self.options - - not_both(options.use_rcs, '--use-rcs', - options.use_cvs, '--use-cvs') - - if options.use_rcs: - revision_reader = RCSRevisionReader( - co_executable=options.co_executable - ) - else: - # --use-cvs is the default: - revision_reader = CVSRevisionReader( - cvs_executable=options.cvs_executable - ) - - if not ctx.dry_run and not options.dumpfile: - raise FatalError("must pass '--dry-run' or '--dumpfile' option.") - - ctx.revision_recorder = NullRevisionRecorder() - ctx.revision_excluder = NullRevisionExcluder() - ctx.revision_reader = None - - ctx.output_option = GitOutputOption( - options.dumpfile, - GitRevisionInlineWriter(revision_reader), - max_merges=None, - # Optional map from CVS author names to bzr author names: - author_transforms={}, # FIXME - ) - - diff --git a/cvs2svn_lib/changeset.py b/cvs2svn_lib/changeset.py deleted file mode 100644 index 1022e0a..0000000 --- a/cvs2svn_lib/changeset.py +++ /dev/null @@ -1,269 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2006-2008 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""Manage change sets.""" - - -from cvs2svn_lib.common import InternalError -from cvs2svn_lib.context import Ctx -from cvs2svn_lib.symbol import Branch -from cvs2svn_lib.symbol import Tag -from cvs2svn_lib.time_range import TimeRange -from cvs2svn_lib.changeset_graph_node import ChangesetGraphNode - - -class Changeset(object): - """A set of cvs_items that might potentially form a single change set.""" - - def __init__(self, id, cvs_item_ids): - self.id = id - self.cvs_item_ids = list(cvs_item_ids) - - def iter_cvs_items(self): - """Yield the CVSItems within this Changeset.""" - - for (id, cvs_item) in Ctx()._cvs_items_db.get_many(self.cvs_item_ids): - assert cvs_item is not None - yield cvs_item - - def get_projects_opened(self): - """Return the set of projects that might be opened by this changeset.""" - - raise NotImplementedError() - - def create_graph_node(self, cvs_item_to_changeset_id): - """Return a ChangesetGraphNode for this Changeset.""" - - raise NotImplementedError() - - def create_split_changeset(self, id, cvs_item_ids): - """Return a Changeset with the specified contents. - - This method is only implemented for changesets that can be split. - The type of the new changeset should be the same as that of SELF, - and any other information from SELF should also be copied to the - new changeset.""" - - raise NotImplementedError() - - def __getstate__(self): - return (self.id, self.cvs_item_ids,) - - def __setstate__(self, state): - (self.id, self.cvs_item_ids,) = state - - def __cmp__(self, other): - raise NotImplementedError() - - def __str__(self): - raise NotImplementedError() - - def __repr__(self): - return '%s [%s]' % ( - self, ', '.join(['%x' % id for id in self.cvs_item_ids]),) - - -class RevisionChangeset(Changeset): - """A Changeset consisting of CVSRevisions.""" - - _sort_order = 3 - - def create_graph_node(self, cvs_item_to_changeset_id): - time_range = TimeRange() - pred_ids = set() - succ_ids = set() - - for cvs_item in self.iter_cvs_items(): - time_range.add(cvs_item.timestamp) - - for pred_id in cvs_item.get_pred_ids(): - changeset_id = cvs_item_to_changeset_id.get(pred_id) - if changeset_id is not None: - pred_ids.add(changeset_id) - - for succ_id in cvs_item.get_succ_ids(): - changeset_id = cvs_item_to_changeset_id.get(succ_id) - if changeset_id is not None: - succ_ids.add(changeset_id) - - return ChangesetGraphNode(self, time_range, pred_ids, succ_ids) - - def create_split_changeset(self, id, cvs_item_ids): - return RevisionChangeset(id, cvs_item_ids) - - def __cmp__(self, other): - return cmp(self._sort_order, other._sort_order) \ - or cmp(self.id, other.id) - - def __str__(self): - return 'RevisionChangeset<%x>' % (self.id,) - - -class OrderedChangeset(Changeset): - """A Changeset of CVSRevisions whose preliminary order is known. - - The first changeset ordering involves only RevisionChangesets, and - results in a full ordering of RevisionChangesets (i.e., a linear - chain of dependencies with the order consistent with the - dependencies). These OrderedChangesets form the skeleton for the - full topological sort that includes SymbolChangesets as well.""" - - _sort_order = 2 - - def __init__(self, id, cvs_item_ids, ordinal, prev_id, next_id): - Changeset.__init__(self, id, cvs_item_ids) - - # The order of this changeset among all OrderedChangesets: - self.ordinal = ordinal - - # The changeset id of the previous OrderedChangeset, or None if - # this is the first OrderedChangeset: - self.prev_id = prev_id - - # The changeset id of the next OrderedChangeset, or None if this - # is the last OrderedChangeset: - self.next_id = next_id - - def get_projects_opened(self): - retval = set() - for cvs_item in self.iter_cvs_items(): - retval.add(cvs_item.cvs_file.project) - return retval - - def create_graph_node(self, cvs_item_to_changeset_id): - time_range = TimeRange() - - pred_ids = set() - succ_ids = set() - - if self.prev_id is not None: - pred_ids.add(self.prev_id) - - if self.next_id is not None: - succ_ids.add(self.next_id) - - for cvs_item in self.iter_cvs_items(): - time_range.add(cvs_item.timestamp) - - for pred_id in cvs_item.get_symbol_pred_ids(): - changeset_id = cvs_item_to_changeset_id.get(pred_id) - if changeset_id is not None: - pred_ids.add(changeset_id) - - for succ_id in cvs_item.get_symbol_succ_ids(): - changeset_id = cvs_item_to_changeset_id.get(succ_id) - if changeset_id is not None: - succ_ids.add(changeset_id) - - return ChangesetGraphNode(self, time_range, pred_ids, succ_ids) - - def __getstate__(self): - return ( - Changeset.__getstate__(self), - self.ordinal, self.prev_id, self.next_id,) - - def __setstate__(self, state): - (changeset_state, self.ordinal, self.prev_id, self.next_id,) = state - Changeset.__setstate__(self, changeset_state) - - def __cmp__(self, other): - return cmp(self._sort_order, other._sort_order) \ - or cmp(self.id, other.id) - - def __str__(self): - return 'OrderedChangeset<%x(%d)>' % (self.id, self.ordinal,) - - -class SymbolChangeset(Changeset): - """A Changeset consisting of CVSSymbols.""" - - def __init__(self, id, symbol, cvs_item_ids): - Changeset.__init__(self, id, cvs_item_ids) - self.symbol = symbol - - def get_projects_opened(self): - # A SymbolChangeset can never open a project. - return set() - - def create_graph_node(self, cvs_item_to_changeset_id): - pred_ids = set() - succ_ids = set() - - for cvs_item in self.iter_cvs_items(): - for pred_id in cvs_item.get_pred_ids(): - changeset_id = cvs_item_to_changeset_id.get(pred_id) - if changeset_id is not None: - pred_ids.add(changeset_id) - - for succ_id in cvs_item.get_succ_ids(): - changeset_id = cvs_item_to_changeset_id.get(succ_id) - if changeset_id is not None: - succ_ids.add(changeset_id) - - return ChangesetGraphNode(self, TimeRange(), pred_ids, succ_ids) - - def __cmp__(self, other): - return cmp(self._sort_order, other._sort_order) \ - or cmp(self.symbol, other.symbol) \ - or cmp(self.id, other.id) - - def __getstate__(self): - return (Changeset.__getstate__(self), self.symbol.id,) - - def __setstate__(self, state): - (changeset_state, symbol_id) = state - Changeset.__setstate__(self, changeset_state) - self.symbol = Ctx()._symbol_db.get_symbol(symbol_id) - - -class BranchChangeset(SymbolChangeset): - """A Changeset consisting of CVSBranches.""" - - _sort_order = 1 - - def create_split_changeset(self, id, cvs_item_ids): - return BranchChangeset(id, self.symbol, cvs_item_ids) - - def __str__(self): - return 'BranchChangeset<%x>("%s")' % (self.id, self.symbol,) - - -class TagChangeset(SymbolChangeset): - """A Changeset consisting of CVSTags.""" - - _sort_order = 0 - - def create_split_changeset(self, id, cvs_item_ids): - return TagChangeset(id, self.symbol, cvs_item_ids) - - def __str__(self): - return 'TagChangeset<%x>("%s")' % (self.id, self.symbol,) - - -def create_symbol_changeset(id, symbol, cvs_item_ids): - """Factory function for SymbolChangesets. - - Return a BranchChangeset or TagChangeset, depending on the type of - SYMBOL. SYMBOL must be a Branch or Tag.""" - - if isinstance(symbol, Branch): - return BranchChangeset(id, symbol, cvs_item_ids) - if isinstance(symbol, Tag): - return TagChangeset(id, symbol, cvs_item_ids) - else: - raise InternalError('Unknown symbol type %s' % (symbol,)) - - diff --git a/cvs2svn_lib/changeset_database.py b/cvs2svn_lib/changeset_database.py deleted file mode 100644 index 82ca904..0000000 --- a/cvs2svn_lib/changeset_database.py +++ /dev/null @@ -1,70 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2006-2008 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""This module contains classes to store changesets.""" - - -from cvs2svn_lib.changeset import Changeset -from cvs2svn_lib.changeset import RevisionChangeset -from cvs2svn_lib.changeset import OrderedChangeset -from cvs2svn_lib.changeset import SymbolChangeset -from cvs2svn_lib.changeset import BranchChangeset -from cvs2svn_lib.changeset import TagChangeset -from cvs2svn_lib.record_table import UnsignedIntegerPacker -from cvs2svn_lib.record_table import MmapRecordTable -from cvs2svn_lib.record_table import RecordTable -from cvs2svn_lib.database import IndexedStore -from cvs2svn_lib.serializer import PrimedPickleSerializer - - -# Should the CVSItemToChangesetTable database files be memory mapped? -# This speeds up the converstion but can cause the computer's virtual -# address space to be exhausted. This option can be changed -# externally, affecting any CVSItemToChangesetTables opened subsequent -# to the change: -use_mmap_for_cvs_item_to_changeset_table = False - - -def CVSItemToChangesetTable(filename, mode): - if use_mmap_for_cvs_item_to_changeset_table: - return MmapRecordTable(filename, mode, UnsignedIntegerPacker()) - else: - return RecordTable(filename, mode, UnsignedIntegerPacker()) - - -class ChangesetDatabase(IndexedStore): - def __init__(self, filename, index_filename, mode): - primer = ( - Changeset, - RevisionChangeset, - OrderedChangeset, - SymbolChangeset, - BranchChangeset, - TagChangeset, - ) - IndexedStore.__init__( - self, filename, index_filename, mode, PrimedPickleSerializer(primer)) - - def store(self, changeset): - self.add(changeset) - - def keys(self): - return list(self.iterkeys()) - - def close(self): - IndexedStore.close(self) - - diff --git a/cvs2svn_lib/changeset_graph.py b/cvs2svn_lib/changeset_graph.py deleted file mode 100644 index 64ebf2c..0000000 --- a/cvs2svn_lib/changeset_graph.py +++ /dev/null @@ -1,456 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2006-2008 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""The changeset dependency graph.""" - - -from cvs2svn_lib.log import Log -from cvs2svn_lib.changeset import RevisionChangeset -from cvs2svn_lib.changeset import OrderedChangeset -from cvs2svn_lib.changeset import BranchChangeset -from cvs2svn_lib.changeset import TagChangeset - - -class CycleInGraphException(Exception): - def __init__(self, cycle): - Exception.__init__( - self, - 'Cycle found in graph: %s' - % ' -> '.join(map(str, cycle + [cycle[0]]))) - - -class NoPredNodeInGraphException(Exception): - def __init__(self, node): - Exception.__init__(self, 'Node %s has no predecessors' % (node,)) - - -class _NoPredNodes: - """Manage changesets that are to be processed. - - Output the changesets in order by time and changeset type. - - The implementation of this class is crude: as changesets are added, - they are appended to a list. When one is needed, the list is sorted - in reverse order and then the last changeset in the list is - returned. To reduce the number of sorts that are needed, the class - keeps track of whether the list is currently sorted. - - All this repeated sorting is wasteful and unnecessary. We should - instead use a heap to output the changeset order, which would - require O(lg N) work per add()/get() rather than O(1) and O(N lg N) - as in the current implementation [1]. But: (1) the lame interface - of heapq doesn't allow an arbitrary compare function, so we would - have to store extra information in the array elements; (2) in - practice, the number of items in the list at any time is only a tiny - fraction of the total number of changesets; and (3) testing showed - that the heapq implementation is no faster than this one (perhaps - because of the increased memory usage). - - [1] According to Objects/listsort.txt in the Python source code, the - Python list-sorting code is heavily optimized for arrays that have - runs of already-sorted elements, so the current cost of get() is - probably closer to O(N) than O(N lg N).""" - - def __init__(self, changeset_db): - self.changeset_db = changeset_db - # A list [(node, changeset,)] of nodes with no predecessors: - self._nodes = [] - self._sorted = True - - def __len__(self): - return len(self._nodes) - - @staticmethod - def _compare((node_1, changeset_1), (node_2, changeset_2)): - """Define a (reverse) ordering on self._nodes.""" - - return cmp(node_2.time_range, node_1.time_range) \ - or cmp(changeset_2, changeset_1) - - def add(self, node): - self._nodes.append( (node, self.changeset_db[node.id],) ) - self._sorted = False - - def get(self): - """Return (node, changeset,) of the smallest node. - - 'Smallest' is defined by self._compare().""" - - if not self._sorted: - self._nodes.sort(self._compare) - self._sorted = True - return self._nodes.pop() - - -class ChangesetGraph(object): - """A graph of changesets and their dependencies.""" - - def __init__(self, changeset_db, cvs_item_to_changeset_id): - self._changeset_db = changeset_db - self._cvs_item_to_changeset_id = cvs_item_to_changeset_id - # A map { id : ChangesetGraphNode } - self.nodes = {} - - def close(self): - self._cvs_item_to_changeset_id.close() - self._cvs_item_to_changeset_id = None - self._changeset_db.close() - self._changeset_db = None - - def add_changeset(self, changeset): - """Add CHANGESET to this graph. - - Determine and record any dependencies to changesets that are - already in the graph. This method does not affect the databases.""" - - node = changeset.create_graph_node(self._cvs_item_to_changeset_id) - - # Now tie the node into our graph. If a changeset referenced by - # node is already in our graph, then add the backwards connection - # from the other node to the new one. If not, then delete the - # changeset from node. - - for pred_id in list(node.pred_ids): - pred_node = self.nodes.get(pred_id) - if pred_node is not None: - pred_node.succ_ids.add(node.id) - else: - node.pred_ids.remove(pred_id) - - for succ_id in list(node.succ_ids): - succ_node = self.nodes.get(succ_id) - if succ_node is not None: - succ_node.pred_ids.add(node.id) - else: - node.succ_ids.remove(succ_id) - - self.nodes[node.id] = node - - def store_changeset(self, changeset): - for cvs_item_id in changeset.cvs_item_ids: - self._cvs_item_to_changeset_id[cvs_item_id] = changeset.id - self._changeset_db.store(changeset) - - def add_new_changeset(self, changeset): - """Add the new CHANGESET to the graph and also to the databases.""" - - if Log().is_on(Log.DEBUG): - Log().debug('Adding changeset %r' % (changeset,)) - - self.add_changeset(changeset) - self.store_changeset(changeset) - - def delete_changeset(self, changeset): - """Remove CHANGESET from the graph and also from the databases. - - In fact, we don't remove CHANGESET from - self._cvs_item_to_changeset_id, because in practice the CVSItems - in CHANGESET are always added again as part of a new CHANGESET, - which will cause the old values to be overwritten.""" - - if Log().is_on(Log.DEBUG): - Log().debug('Removing changeset %r' % (changeset,)) - - del self[changeset.id] - del self._changeset_db[changeset.id] - - def __nonzero__(self): - """Instances are considered True iff they contain any nodes.""" - - return bool(self.nodes) - - def __contains__(self, id): - """Return True if the specified ID is contained in this graph.""" - - return id in self.nodes - - def __getitem__(self, id): - return self.nodes[id] - - def get(self, id): - return self.nodes.get(id) - - def __delitem__(self, id): - """Remove the node corresponding to ID. - - Also remove references to it from other nodes. This method does - not change pred_ids or succ_ids of the node being deleted, nor - does it affect the databases.""" - - node = self[id] - - for succ_id in node.succ_ids: - succ = self[succ_id] - succ.pred_ids.remove(node.id) - - for pred_id in node.pred_ids: - pred = self[pred_id] - pred.succ_ids.remove(node.id) - - del self.nodes[node.id] - - def keys(self): - return self.nodes.keys() - - def __iter__(self): - return self.nodes.itervalues() - - def _get_path(self, reachable_changesets, starting_node_id, ending_node_id): - """Return the shortest path from ENDING_NODE_ID to STARTING_NODE_ID. - - Find a path from ENDING_NODE_ID to STARTING_NODE_ID in - REACHABLE_CHANGESETS, where STARTING_NODE_ID is the id of a - changeset that depends on the changeset with ENDING_NODE_ID. (See - the comment in search_for_path() for a description of the format - of REACHABLE_CHANGESETS.) - - Return a list of changesets, where the 0th one has ENDING_NODE_ID - and the last one has STARTING_NODE_ID. If there is no such path - described in in REACHABLE_CHANGESETS, return None.""" - - if ending_node_id not in reachable_changesets: - return None - - path = [self._changeset_db[ending_node_id]] - id = reachable_changesets[ending_node_id][1] - while id != starting_node_id: - path.append(self._changeset_db[id]) - id = reachable_changesets[id][1] - path.append(self._changeset_db[starting_node_id]) - return path - - def search_for_path(self, starting_node_id, stop_set): - """Search for paths to prerequisites of STARTING_NODE_ID. - - Try to find the shortest dependency path that causes the changeset - with STARTING_NODE_ID to depend (directly or indirectly) on one of - the changesets whose ids are contained in STOP_SET. - - We consider direct and indirect dependencies in the sense that the - changeset can be reached by following a chain of predecessor nodes. - - When one of the changeset_ids in STOP_SET is found, terminate the - search and return the path from that changeset_id to - STARTING_NODE_ID. If no path is found to a node in STOP_SET, - return None.""" - - # A map {node_id : (steps, next_node_id)} where NODE_ID can be - # reached from STARTING_NODE_ID in STEPS steps, and NEXT_NODE_ID - # is the id of the previous node in the path. STARTING_NODE_ID is - # only included as a key if there is a loop leading back to it. - reachable_changesets = {} - - # A list of (node_id, steps) that still have to be investigated, - # and STEPS is the number of steps to get to NODE_ID. - open_nodes = [(starting_node_id, 0)] - # A breadth-first search: - while open_nodes: - (id, steps) = open_nodes.pop(0) - steps += 1 - node = self[id] - for pred_id in node.pred_ids: - # Since the search is breadth-first, we only have to set steps - # that don't already exist. - if pred_id not in reachable_changesets: - reachable_changesets[pred_id] = (steps, id) - open_nodes.append((pred_id, steps)) - - # See if we can stop now: - if pred_id in stop_set: - return self._get_path( - reachable_changesets, starting_node_id, pred_id - ) - - return None - - def consume_nopred_nodes(self): - """Remove and yield changesets in dependency order. - - Each iteration, this generator yields a (changeset, time_range) - tuple for the oldest changeset in the graph that doesn't have any - predecessor nodes (i.e., it is ready to be committed). This is - continued until there are no more nodes without predecessors - (either because the graph has been emptied, or because of cycles - in the graph). - - Among the changesets that are ready to be processed, the earliest - one (according to the sorting of the TimeRange class) is yielded - each time. (This is the order in which the changesets should be - committed.) - - The graph should not be otherwise altered while this generator is - running.""" - - # Find a list of (node,changeset,) where the node has no - # predecessors: - nopred_nodes = _NoPredNodes(self._changeset_db) - for node in self.nodes.itervalues(): - if not node.pred_ids: - nopred_nodes.add(node) - - while nopred_nodes: - (node, changeset,) = nopred_nodes.get() - del self[node.id] - # See if any successors are now ready for extraction: - for succ_id in node.succ_ids: - succ = self[succ_id] - if not succ.pred_ids: - nopred_nodes.add(succ) - yield (changeset, node.time_range) - - def find_cycle(self, starting_node_id): - """Find a cycle in the dependency graph and return it. - - Use STARTING_NODE_ID as the place to start looking. This routine - must only be called after all nopred_nodes have been removed. - Return the list of changesets that are involved in the cycle - (ordered such that cycle[n-1] is a predecessor of cycle[n] and - cycle[-1] is a predecessor of cycle[0]).""" - - # Since there are no nopred nodes in the graph, all nodes in the - # graph must either be involved in a cycle or depend (directly or - # indirectly) on nodes that are in a cycle. - - # Pick an arbitrary node: - node = self[starting_node_id] - - seen_nodes = [node] - - # Follow it backwards until a node is seen a second time; then we - # have our cycle. - while True: - # Pick an arbitrary predecessor of node. It must exist, because - # there are no nopred nodes: - try: - node_id = node.pred_ids.__iter__().next() - except StopIteration: - raise NoPredNodeInGraphException(node) - node = self[node_id] - try: - i = seen_nodes.index(node) - except ValueError: - seen_nodes.append(node) - else: - seen_nodes = seen_nodes[i:] - seen_nodes.reverse() - return [self._changeset_db[node.id] for node in seen_nodes] - - def consume_graph(self, cycle_breaker=None): - """Remove and yield changesets from this graph in dependency order. - - Each iteration, this generator yields a (changeset, time_range) - tuple for the oldest changeset in the graph that doesn't have any - predecessor nodes. If CYCLE_BREAKER is specified, then call - CYCLE_BREAKER(cycle) whenever a cycle is encountered, where cycle - is the list of changesets that are involved in the cycle (ordered - such that cycle[n-1] is a predecessor of cycle[n] and cycle[-1] is - a predecessor of cycle[0]). CYCLE_BREAKER should break the cycle - in place then return. - - If a cycle is found and CYCLE_BREAKER was not specified, raise - CycleInGraphException.""" - - while True: - for (changeset, time_range) in self.consume_nopred_nodes(): - yield (changeset, time_range) - - # If there are any nodes left in the graph, then there must be - # at least one cycle. Find a cycle and process it. - - # This might raise StopIteration, but that indicates that the - # graph has been fully consumed, so we just let the exception - # escape. - start_node_id = self.nodes.iterkeys().next() - - cycle = self.find_cycle(start_node_id) - - if cycle_breaker is not None: - cycle_breaker(cycle) - else: - raise CycleInGraphException(cycle) - - def __repr__(self): - """For convenience only. The format is subject to change at any time.""" - - if self.nodes: - return 'ChangesetGraph:\n%s' \ - % ''.join([' %r\n' % node for node in self]) - else: - return 'ChangesetGraph:\n EMPTY\n' - - node_colors = { - RevisionChangeset : 'lightgreen', - OrderedChangeset : 'cyan', - BranchChangeset : 'orange', - TagChangeset : 'yellow', - } - - def output_coarse_dot(self, f): - """Output the graph in DOT format to file-like object f. - - Such a file can be rendered into a visual representation of the - graph using tools like graphviz. Include only changesets in the - graph, and the dependencies between changesets.""" - - f.write('digraph G {\n') - for node in self: - f.write( - ' C%x [style=filled, fillcolor=%s];\n' % ( - node.id, - self.node_colors[self._changeset_db[node.id].__class__], - ) - ) - f.write('\n') - - for node in self: - for succ_id in node.succ_ids: - f.write(' C%x -> C%x\n' % (node.id, succ_id,)) - f.write('\n') - - f.write('}\n') - - def output_fine_dot(self, f): - """Output the graph in DOT format to file-like object f. - - Such a file can be rendered into a visual representation of the - graph using tools like graphviz. Include all CVSItems and the - CVSItem-CVSItem dependencies in the graph. Group the CVSItems - into clusters by changeset.""" - - f.write('digraph G {\n') - for node in self: - f.write(' subgraph cluster_%x {\n' % (node.id,)) - f.write(' label = "C%x";\n' % (node.id,)) - changeset = self._changeset_db[node.id] - for item_id in changeset.cvs_item_ids: - f.write(' I%x;\n' % (item_id,)) - f.write(' style=filled;\n') - f.write( - ' fillcolor=%s;\n' - % (self.node_colors[self._changeset_db[node.id].__class__],)) - f.write(' }\n\n') - - for node in self: - changeset = self._changeset_db[node.id] - for cvs_item in changeset.iter_cvs_items(): - for succ_id in cvs_item.get_succ_ids(): - f.write(' I%x -> I%x;\n' % (cvs_item.id, succ_id,)) - - f.write('\n') - - f.write('}\n') - - diff --git a/cvs2svn_lib/changeset_graph_link.py b/cvs2svn_lib/changeset_graph_link.py deleted file mode 100644 index 9d0cc9d..0000000 --- a/cvs2svn_lib/changeset_graph_link.py +++ /dev/null @@ -1,149 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2006-2008 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""Keep track of counts of different types of changeset links.""" - - - -# A cvs_item doesn't depend on any cvs_items in either pred or succ: -LINK_NONE = 0 - -# A cvs_item depends on one or more cvs_items in pred but none in succ: -LINK_PRED = 1 - -# A cvs_item depends on one or more cvs_items in succ but none in pred: -LINK_SUCC = 2 - -# A cvs_item depends on one or more cvs_items in both pred and succ: -LINK_PASSTHRU = LINK_PRED | LINK_SUCC - - -class ChangesetGraphLink(object): - def __init__(self, pred, changeset, succ): - """Represent a link in a loop in a changeset graph. - - This is the link that goes from PRED -> CHANGESET -> SUCC. - - We are mainly concerned with how many CVSItems have LINK_PRED, - LINK_SUCC, and LINK_PASSTHRU type links to the neighboring - commitsets. If necessary, this class can also break up CHANGESET - into multiple changesets.""" - - self.pred = pred - self.pred_ids = set(pred.cvs_item_ids) - - self.changeset = changeset - - self.succ_ids = set(succ.cvs_item_ids) - self.succ = succ - - # A count of each type of link for cvs_items in changeset - # (indexed by LINK_* constants): - link_counts = [0] * 4 - - for cvs_item in list(changeset.iter_cvs_items()): - link_counts[self.get_link_type(cvs_item)] += 1 - - [self.pred_links, self.succ_links, self.passthru_links] = link_counts[1:] - - def get_link_type(self, cvs_item): - """Return the type of links from CVS_ITEM to self.PRED and self.SUCC. - - The return value is one of LINK_NONE, LINK_PRED, LINK_SUCC, or - LINK_PASSTHRU.""" - - retval = LINK_NONE - - if cvs_item.get_pred_ids() & self.pred_ids: - retval |= LINK_PRED - if cvs_item.get_succ_ids() & self.succ_ids: - retval |= LINK_SUCC - - return retval - - def get_links_to_move(self): - """Return the number of items that would be moved to split changeset.""" - - return min(self.pred_links, self.succ_links) \ - or max(self.pred_links, self.succ_links) - - def is_breakable(self): - """Return True iff breaking the changeset will do any good.""" - - return self.pred_links != 0 or self.succ_links != 0 - - def __cmp__(self, other): - """Compare SELF with OTHER in terms of which would be better to break. - - The one that is better to break is considered the lesser.""" - - return ( - - cmp(int(self.is_breakable()), int(other.is_breakable())) - or cmp(self.passthru_links, other.passthru_links) - or cmp(self.get_links_to_move(), other.get_links_to_move()) - ) - - def break_changeset(self, changeset_key_generator): - """Break up self.changeset and return the fragments. - - Break it up in such a way that the link is weakened as efficiently - as possible.""" - - if not self.is_breakable(): - raise ValueError('Changeset is not breakable: %r' % self.changeset) - - pred_items = [] - succ_items = [] - - # For each link type, should such CVSItems be moved to the - # changeset containing the predecessor items or the one containing - # the successor items? - destination = { - LINK_PRED : pred_items, - LINK_SUCC : succ_items, - } - - if self.pred_links == 0: - destination[LINK_NONE] = pred_items - destination[LINK_PASSTHRU] = pred_items - elif self.succ_links == 0: - destination[LINK_NONE] = succ_items - destination[LINK_PASSTHRU] = succ_items - elif self.pred_links < self.succ_links: - destination[LINK_NONE] = succ_items - destination[LINK_PASSTHRU] = succ_items - else: - destination[LINK_NONE] = pred_items - destination[LINK_PASSTHRU] = pred_items - - for cvs_item in self.changeset.iter_cvs_items(): - link_type = self.get_link_type(cvs_item) - destination[link_type].append(cvs_item.id) - - # Create new changesets of the same type as the old one: - return [ - self.changeset.create_split_changeset( - changeset_key_generator.gen_id(), pred_items), - self.changeset.create_split_changeset( - changeset_key_generator.gen_id(), succ_items), - ] - - def __str__(self): - return 'Link<%x>(%d, %d, %d)' % ( - self.changeset.id, - self.pred_links, self.succ_links, self.passthru_links) - - diff --git a/cvs2svn_lib/changeset_graph_node.py b/cvs2svn_lib/changeset_graph_node.py deleted file mode 100644 index cbbebd7..0000000 --- a/cvs2svn_lib/changeset_graph_node.py +++ /dev/null @@ -1,50 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2006-2008 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""A node in the changeset dependency graph.""" - - -class ChangesetGraphNode(object): - """A node in the changeset dependency graph.""" - - __slots__ = ['id', 'time_range', 'pred_ids', 'succ_ids'] - - def __init__(self, changeset, time_range, pred_ids, succ_ids): - # The id of the ChangesetGraphNode is the same as the id of the - # changeset. - self.id = changeset.id - - # The range of times of CVSItems within this Changeset. - self.time_range = time_range - - # The set of changeset ids of changesets that are direct - # predecessors of this one. - self.pred_ids = pred_ids - - # The set of changeset ids of changesets that are direct - # successors of this one. - self.succ_ids = succ_ids - - def __repr__(self): - """For convenience only. The format is subject to change at any time.""" - - return '%x; pred=[%s]; succ=[%s]' % ( - self.id, - ','.join(['%x' % id for id in self.pred_ids]), - ','.join(['%x' % id for id in self.succ_ids]), - ) - - diff --git a/cvs2svn_lib/check_dependencies_pass.py b/cvs2svn_lib/check_dependencies_pass.py deleted file mode 100644 index 172c264..0000000 --- a/cvs2svn_lib/check_dependencies_pass.py +++ /dev/null @@ -1,144 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2000-2008 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""This module defines some passes that can be used for debugging cv2svn.""" - - -from cvs2svn_lib import config -from cvs2svn_lib.context import Ctx -from cvs2svn_lib.common import FatalException -from cvs2svn_lib.common import DB_OPEN_READ -from cvs2svn_lib.log import Log -from cvs2svn_lib.pass_manager import Pass -from cvs2svn_lib.project import read_projects -from cvs2svn_lib.artifact_manager import artifact_manager -from cvs2svn_lib.cvs_file_database import CVSFileDatabase -from cvs2svn_lib.symbol_database import SymbolDatabase -from cvs2svn_lib.cvs_item_database import OldCVSItemStore -from cvs2svn_lib.cvs_item_database import IndexedCVSItemStore - - -class CheckDependenciesPass(Pass): - """Check that the dependencies are self-consistent.""" - - def __init__(self): - Pass.__init__(self) - - def register_artifacts(self): - self._register_temp_file_needed(config.PROJECTS) - self._register_temp_file_needed(config.SYMBOL_DB) - self._register_temp_file_needed(config.CVS_FILES_DB) - - def iter_cvs_items(self): - raise NotImplementedError() - - def get_cvs_item(self, item_id): - raise NotImplementedError() - - def run(self, run_options, stats_keeper): - Ctx()._projects = read_projects( - artifact_manager.get_temp_file(config.PROJECTS) - ) - Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_READ) - self.symbol_db = SymbolDatabase() - Ctx()._symbol_db = self.symbol_db - - Log().quiet("Checking dependency consistency...") - - fatal_errors = [] - for cvs_item in self.iter_cvs_items(): - # Check that the pred_ids and succ_ids are mutually consistent: - for pred_id in cvs_item.get_pred_ids(): - pred = self.get_cvs_item(pred_id) - if not cvs_item.id in pred.get_succ_ids(): - fatal_errors.append( - '%s lists pred=%s, but not vice versa.' % (cvs_item, pred,)) - - for succ_id in cvs_item.get_succ_ids(): - succ = self.get_cvs_item(succ_id) - if not cvs_item.id in succ.get_pred_ids(): - fatal_errors.append( - '%s lists succ=%s, but not vice versa.' % (cvs_item, succ,)) - - if fatal_errors: - raise FatalException( - 'Dependencies inconsistent:\n' - '%s\n' - 'Exited due to fatal error(s).' - % ('\n'.join(fatal_errors),) - ) - - self.symbol_db.close() - self.symbol_db = None - Ctx()._cvs_file_db.close() - Log().quiet("Done") - - -class CheckItemStoreDependenciesPass(CheckDependenciesPass): - def __init__(self, cvs_items_store_file): - CheckDependenciesPass.__init__(self) - self.cvs_items_store_file = cvs_items_store_file - - def register_artifacts(self): - CheckDependenciesPass.register_artifacts(self) - self._register_temp_file_needed(self.cvs_items_store_file) - - def iter_cvs_items(self): - cvs_item_store = OldCVSItemStore( - artifact_manager.get_temp_file(self.cvs_items_store_file)) - - for cvs_file_items in cvs_item_store.iter_cvs_file_items(): - self.current_cvs_file_items = cvs_file_items - for cvs_item in cvs_file_items.values(): - yield cvs_item - - del self.current_cvs_file_items - - cvs_item_store.close() - - def get_cvs_item(self, item_id): - return self.current_cvs_file_items[item_id] - - -class CheckIndexedItemStoreDependenciesPass(CheckDependenciesPass): - def __init__(self, cvs_items_store_file, cvs_items_store_index_file): - CheckDependenciesPass.__init__(self) - self.cvs_items_store_file = cvs_items_store_file - self.cvs_items_store_index_file = cvs_items_store_index_file - - def register_artifacts(self): - CheckDependenciesPass.register_artifacts(self) - self._register_temp_file_needed(self.cvs_items_store_file) - self._register_temp_file_needed(self.cvs_items_store_index_file) - - def iter_cvs_items(self): - return self.cvs_item_store.itervalues() - - def get_cvs_item(self, item_id): - return self.cvs_item_store[item_id] - - def run(self, run_options, stats_keeper): - self.cvs_item_store = IndexedCVSItemStore( - artifact_manager.get_temp_file(self.cvs_items_store_file), - artifact_manager.get_temp_file(self.cvs_items_store_index_file), - DB_OPEN_READ) - - CheckDependenciesPass.run(self, run_options, stats_keeper) - - self.cvs_item_store.close() - self.cvs_item_store = None - - diff --git a/cvs2svn_lib/checkout_internal.py b/cvs2svn_lib/checkout_internal.py deleted file mode 100644 index fe28e0c..0000000 --- a/cvs2svn_lib/checkout_internal.py +++ /dev/null @@ -1,778 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2007-2009 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""This module contains classes that implement the --use-internal-co option. - -The idea is to patch up the revisions' contents incrementally, thus -avoiding the huge number of process spawns and the O(n^2) overhead of -using 'co' and 'cvs'. - -InternalRevisionRecorder saves the RCS deltas and RCS revision trees -to databases. Notably, deltas from the trunk need to be reversed, as -CVS stores them so they apply from HEAD backwards. - -InternalRevisionExcluder copies the revision trees to a new database, -omitting excluded branches. - -InternalRevisionReader produces the revisions' contents on demand. To -generate the text for a typical revision, we need the revision's delta -text plus the fulltext of the previous revision. Therefore, we -maintain a checkout database containing a copy of the fulltext of any -revision for which subsequent revisions still need to be retrieved. -It is crucial to remove text from this database as soon as it is no -longer needed, to prevent it from growing enormous. - -There are two reasons that the text from a revision can be needed: (1) -because the revision itself still needs to be output to a dumpfile; -(2) because another revision needs it as the base of its delta. We -maintain a reference count for each revision, which includes *both* -possibilities. The first time a revision's text is needed, it is -generated by applying the revision's deltatext to the previous -revision's fulltext, and the resulting fulltext is stored in the -checkout database. Each time a revision's fulltext is retrieved, its -reference count is decremented. When the reference count goes to -zero, then the fulltext is deleted from the checkout database. - -The administrative data for managing this consists of one TextRecord -entry for each revision. Each TextRecord has an id, which is the same -id as used for the corresponding CVSRevision instance. It also -maintains a count of the times it is expected to be retrieved. -TextRecords come in several varieties: - -FullTextRecord -- Used for revisions whose fulltext is contained - directly in the RCS file, and therefore available during - CollectRevsPass (i.e., typically revision 1.1 of each file). - -DeltaTextRecord -- Used for revisions that are defined via a delta - relative to some other TextRecord. These records record the id of - the TextRecord that holds the base text against which the delta is - defined. When the text for a DeltaTextRecord is retrieved, the - DeltaTextRecord instance is deleted and a CheckedOutTextRecord - instance is created to take its place. - -CheckedOutTextRecord -- Used during OutputPass for a revision that - started out as a DeltaTextRecord, but has already been retrieved - (and therefore its fulltext is stored in the checkout database). - -While a file is being processed during CollectRevsPass, the fulltext -and deltas are stored to the delta database, and TextRecord instances -are created to keep track of things. The reference counts are all -initialized to zero. - -After CollectRevsPass has done any preliminary tree mangling, its -_FileDataCollector.parse_completed(), method calls -RevisionRecorder.finish_file(), passing it the CVSFileItems instance -that describes the revisions in the file. At this point the reference -counts for the file's TextRecords are updated: each record referred to -by a delta has its refcount incremented, and each record that -corresponds to a non-delete CVSRevision is incremented. After that, -any records with refcount==0 are removed. When one record is removed, -that can cause another record's reference count to go to zero and be -removed too, recursively. When a TextRecord is deleted at this stage, -its deltatext is also deleted from the delta database. - -In FilterSymbolsPass, the exact same procedure (described in the -previous paragraph) is repeated, but this time using the CVSFileItems -after it has been updated for excluded symbols, symbol -preferred-parent grafting, etc.""" - - -import cStringIO -import re -import time - -from cvs2svn_lib import config -from cvs2svn_lib.common import DB_OPEN_NEW -from cvs2svn_lib.common import DB_OPEN_READ -from cvs2svn_lib.common import warning_prefix -from cvs2svn_lib.common import FatalError -from cvs2svn_lib.common import InternalError -from cvs2svn_lib.context import Ctx -from cvs2svn_lib.log import Log -from cvs2svn_lib.artifact_manager import artifact_manager -from cvs2svn_lib.symbol import Trunk -from cvs2svn_lib.cvs_item import CVSRevisionModification -from cvs2svn_lib.database import Database -from cvs2svn_lib.database import IndexedDatabase -from cvs2svn_lib.rcs_stream import RCSStream -from cvs2svn_lib.rcs_stream import MalformedDeltaException -from cvs2svn_lib.revision_manager import RevisionRecorder -from cvs2svn_lib.revision_manager import RevisionExcluder -from cvs2svn_lib.revision_manager import RevisionReader -from cvs2svn_lib.serializer import MarshalSerializer -from cvs2svn_lib.serializer import CompressingSerializer -from cvs2svn_lib.serializer import PrimedPickleSerializer - - -class TextRecord(object): - """Bookkeeping data for the text of a single CVSRevision.""" - - __slots__ = ['id', 'refcount'] - - def __init__(self, id): - # The cvs_rev_id of the revision whose text this is. - self.id = id - - # The number of times that the text of this revision will be - # retrieved. - self.refcount = 0 - - def __getstate__(self): - return (self.id, self.refcount,) - - def __setstate__(self, state): - (self.id, self.refcount,) = state - - def increment_dependency_refcounts(self, text_record_db): - """Increment the refcounts of any records that this one depends on.""" - - pass - - def decrement_refcount(self, text_record_db): - """Decrement the number of times our text still has to be checked out. - - If the reference count goes to zero, call discard().""" - - self.refcount -= 1 - if self.refcount == 0: - text_record_db.discard(self.id) - - def checkout(self, text_record_db): - """Workhorse of the checkout process. - - Return the text for this revision, decrement our reference count, - and update the databases depending on whether there will be future - checkouts.""" - - raise NotImplementedError() - - def free(self, text_record_db): - """This instance will never again be checked out; free it. - - Also free any associated resources and decrement the refcounts of - any other TextRecords that this one depends on.""" - - raise NotImplementedError() - - -class FullTextRecord(TextRecord): - __slots__ = [] - - def __getstate__(self): - return (self.id, self.refcount,) - - def __setstate__(self, state): - (self.id, self.refcount,) = state - - def checkout(self, text_record_db): - text = text_record_db.delta_db[self.id] - self.decrement_refcount(text_record_db) - return text - - def free(self, text_record_db): - del text_record_db.delta_db[self.id] - - def __str__(self): - return 'FullTextRecord(%x, %d)' % (self.id, self.refcount,) - - -class DeltaTextRecord(TextRecord): - __slots__ = ['pred_id'] - - def __init__(self, id, pred_id): - TextRecord.__init__(self, id) - - # The cvs_rev_id of the revision relative to which this delta is - # defined. - self.pred_id = pred_id - - def __getstate__(self): - return (self.id, self.refcount, self.pred_id,) - - def __setstate__(self, state): - (self.id, self.refcount, self.pred_id,) = state - - def increment_dependency_refcounts(self, text_record_db): - text_record_db[self.pred_id].refcount += 1 - - def checkout(self, text_record_db): - base_text = text_record_db[self.pred_id].checkout(text_record_db) - co = RCSStream(base_text) - delta_text = text_record_db.delta_db[self.id] - co.apply_diff(delta_text) - text = co.get_text() - del co - self.refcount -= 1 - if self.refcount == 0: - # This text will never be needed again; just delete ourselves - # without ever having stored the fulltext to the checkout - # database: - del text_record_db[self.id] - else: - # Store a new CheckedOutTextRecord in place of ourselves: - text_record_db.checkout_db['%x' % self.id] = text - new_text_record = CheckedOutTextRecord(self.id) - new_text_record.refcount = self.refcount - text_record_db.replace(new_text_record) - return text - - def free(self, text_record_db): - del text_record_db.delta_db[self.id] - text_record_db[self.pred_id].decrement_refcount(text_record_db) - - def __str__(self): - return 'DeltaTextRecord(%x -> %x, %d)' \ - % (self.pred_id, self.id, self.refcount,) - - -class CheckedOutTextRecord(TextRecord): - __slots__ = [] - - def __getstate__(self): - return (self.id, self.refcount,) - - def __setstate__(self, state): - (self.id, self.refcount,) = state - - def checkout(self, text_record_db): - text = text_record_db.checkout_db['%x' % self.id] - self.decrement_refcount(text_record_db) - return text - - def free(self, text_record_db): - del text_record_db.checkout_db['%x' % self.id] - - def __str__(self): - return 'CheckedOutTextRecord(%x, %d)' % (self.id, self.refcount,) - - -class NullDatabase(object): - """A do-nothing database that can be used with TextRecordDatabase. - - Use this when you don't actually want to allow anything to be - deleted.""" - - def __delitem__(self, id): - pass - - -class TextRecordDatabase: - """Holds the TextRecord instances that are currently live. - - During CollectRevsPass and FilterSymbolsPass, files are processed - one by one and a new TextRecordDatabase instance is used for each - file. During OutputPass, a single TextRecordDatabase instance is - used for the duration of OutputPass; individual records are added - and removed when they are active.""" - - def __init__(self, delta_db, checkout_db): - # A map { cvs_rev_id -> TextRecord }. - self.text_records = {} - - # A database-like object using cvs_rev_ids as keys and containing - # fulltext/deltatext strings as values. Its __getitem__() method - # is used to retrieve deltas when they are needed, and its - # __delitem__() method is used to delete deltas when they can be - # freed. The modifiability of the delta database varies from pass - # to pass, so the object stored here varies as well: - # - # CollectRevsPass: a fully-functional IndexedDatabase. This - # allows deltas that will not be needed to be deleted. - # - # FilterSymbolsPass: a NullDatabase. The delta database cannot be - # modified during this pass, and we have no need to retrieve - # deltas, so we just use a dummy object here. - # - # OutputPass: a disabled IndexedDatabase. During this pass we - # need to retrieve deltas, but we are not allowed to modify the - # delta database. So we use an IndexedDatabase whose __del__() - # method has been disabled to do nothing. - self.delta_db = delta_db - - # A database-like object using cvs_rev_ids as keys and containing - # fulltext strings as values. This database is only set during - # OutputPass. - self.checkout_db = checkout_db - - # If this is set to a list, then the list holds the ids of - # text_records that have to be deleted; when discard() is called, - # it adds the requested id to the list but does not delete it. If - # this member is set to None, then text_records are deleted - # immediately when discard() is called. - self.deferred_deletes = None - - def __getstate__(self): - return (self.text_records.values(),) - - def __setstate__(self, state): - (text_records,) = state - self.text_records = {} - for text_record in text_records: - self.add(text_record) - self.delta_db = NullDatabase() - self.checkout_db = NullDatabase() - self.deferred_deletes = None - - def add(self, text_record): - """Add TEXT_RECORD to our database. - - There must not already be a record with the same id.""" - - assert not self.text_records.has_key(text_record.id) - - self.text_records[text_record.id] = text_record - - def __getitem__(self, id): - return self.text_records[id] - - def __delitem__(self, id): - """Free the record with the specified ID.""" - - del self.text_records[id] - - def replace(self, text_record): - """Store TEXT_RECORD in place of the existing record with the same id. - - Do not do anything with the old record.""" - - assert self.text_records.has_key(text_record.id) - self.text_records[text_record.id] = text_record - - def discard(self, *ids): - """The text records with IDS are no longer needed; discard them. - - This involves calling their free() methods and also removing them - from SELF. - - If SELF.deferred_deletes is not None, then the ids to be deleted - are added to the list instead of deleted immediately. This - mechanism is to prevent a stack overflow from the avalanche of - deletes that can result from deleting a long chain of revisions.""" - - if self.deferred_deletes is None: - # This is an outer-level delete. - self.deferred_deletes = list(ids) - while self.deferred_deletes: - id = self.deferred_deletes.pop() - text_record = self[id] - if text_record.refcount != 0: - raise InternalError( - 'TextRecordDatabase.discard(%s) called with refcount = %d' - % (text_record, text_record.refcount,) - ) - # This call might cause other text_record ids to be added to - # self.deferred_deletes: - text_record.free(self) - del self[id] - self.deferred_deletes = None - else: - self.deferred_deletes.extend(ids) - - def itervalues(self): - return self.text_records.itervalues() - - def recompute_refcounts(self, cvs_file_items): - """Recompute the refcounts of the contained TextRecords. - - Use CVS_FILE_ITEMS to determine which records will be needed by - cvs2svn.""" - - # First clear all of the refcounts: - for text_record in self.itervalues(): - text_record.refcount = 0 - - # Now increment the reference count of records that are needed as - # the source of another record's deltas: - for text_record in self.itervalues(): - text_record.increment_dependency_refcounts(self.text_records) - - # Now increment the reference count of records that will be needed - # by cvs2svn: - for lod_items in cvs_file_items.iter_lods(): - for cvs_rev in lod_items.cvs_revisions: - if isinstance(cvs_rev, CVSRevisionModification): - self[cvs_rev.id].refcount += 1 - - def free_unused(self): - """Free any TextRecords whose reference counts are zero.""" - - # The deletion of some of these text records might cause others to - # be unused, in which case they will be deleted automatically. - # But since the initially-unused records are not referred to by - # any others, we don't have to be afraid that they will be deleted - # before we get to them. But it *is* crucial that we create the - # whole unused list before starting the loop. - - unused = [ - text_record.id - for text_record in self.itervalues() - if text_record.refcount == 0 - ] - - self.discard(*unused) - - def log_leftovers(self): - """If any TextRecords still exist, log them.""" - - if self.text_records: - Log().warn( - "%s: internal problem: leftover revisions in the checkout cache:" - % warning_prefix) - for text_record in self.itervalues(): - Log().warn(' %s' % (text_record,)) - - def __repr__(self): - """Debugging output of the current contents of the TextRecordDatabase.""" - - retval = ['TextRecordDatabase:'] - for text_record in self.itervalues(): - retval.append(' %s' % (text_record,)) - return '\n'.join(retval) - - -class InternalRevisionRecorder(RevisionRecorder): - """A RevisionRecorder that reconstructs the fulltext internally.""" - - def __init__(self, compress): - RevisionRecorder.__init__(self) - self._compress = compress - - def register_artifacts(self, which_pass): - artifact_manager.register_temp_file( - config.RCS_DELTAS_INDEX_TABLE, which_pass - ) - artifact_manager.register_temp_file(config.RCS_DELTAS_STORE, which_pass) - artifact_manager.register_temp_file( - config.RCS_TREES_INDEX_TABLE, which_pass - ) - artifact_manager.register_temp_file(config.RCS_TREES_STORE, which_pass) - - def start(self): - ser = MarshalSerializer() - if self._compress: - ser = CompressingSerializer(ser) - self._rcs_deltas = IndexedDatabase( - artifact_manager.get_temp_file(config.RCS_DELTAS_STORE), - artifact_manager.get_temp_file(config.RCS_DELTAS_INDEX_TABLE), - DB_OPEN_NEW, ser) - primer = (FullTextRecord, DeltaTextRecord) - self._rcs_trees = IndexedDatabase( - artifact_manager.get_temp_file(config.RCS_TREES_STORE), - artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE), - DB_OPEN_NEW, PrimedPickleSerializer(primer)) - - def start_file(self, cvs_file_items): - self._cvs_file_items = cvs_file_items - - # A map from cvs_rev_id to TextRecord instance: - self.text_record_db = TextRecordDatabase(self._rcs_deltas, NullDatabase()) - - def record_text(self, cvs_rev, log, text): - if isinstance(cvs_rev.lod, Trunk): - # On trunk, revisions are encountered in reverse order (1. - # ... 1.1) and deltas are inverted. The first text that we see - # is the fulltext for the HEAD revision. After that, the text - # corresponding to revision 1.N is the delta (1. -> - # 1.)). We have to invert the deltas here so that we can - # read the revisions out in dependency order; that is, for - # revision 1.1 we want the fulltext, and for revision 1. we - # want the delta (1. -> 1.). This means that we can't - # compute the delta for a revision until we see its logical - # parent. When we finally see revision 1.1 (which is recognized - # because it doesn't have a parent), we can record the diff (1.1 - # -> 1.2) for revision 1.2, and also the fulltext for 1.1. - - if cvs_rev.next_id is None: - # This is HEAD, as fulltext. Initialize the RCSStream so - # that we can compute deltas backwards in time. - self._stream = RCSStream(text) - else: - # Any other trunk revision is a backward delta. Apply the - # delta to the RCSStream to mutate it to the contents of this - # revision, and also to get the reverse delta, which we store - # as the forward delta of our child revision. - try: - text = self._stream.invert_diff(text) - except MalformedDeltaException, (msg): - Log().error('Malformed RCS delta in %s, revision %s: %s' - % (cvs_rev.cvs_file.get_filename(), cvs_rev.rev, - msg)) - raise RuntimeError - text_record = DeltaTextRecord(cvs_rev.next_id, cvs_rev.id) - self._writeout(text_record, text) - - if cvs_rev.prev_id is None: - # This is revision 1.1. Write its fulltext: - text_record = FullTextRecord(cvs_rev.id) - self._writeout(text_record, self._stream.get_text()) - - # There will be no more trunk revisions delivered, so free the - # RCSStream. - del self._stream - - else: - # On branches, revisions are encountered in logical order - # (.1 ... .) and the text corresponding to - # revision . is the forward delta (. -> - # .). That's what we need, so just store it. - - # FIXME: It would be nice to avoid writing out branch deltas - # when --trunk-only. (They will be deleted when finish_file() - # is called, but if the delta db is in an IndexedDatabase the - # deletions won't actually recover any disk space.) - text_record = DeltaTextRecord(cvs_rev.id, cvs_rev.prev_id) - self._writeout(text_record, text) - - return None - - def _writeout(self, text_record, text): - self.text_record_db.add(text_record) - self._rcs_deltas[text_record.id] = text - - def finish_file(self, cvs_file_items): - """Finish processing of the current file. - - Compute the initial text record refcounts, discard any records - that are unneeded, and store the text records for the file to the - _rcs_trees database.""" - - # Delete our copy of the preliminary CVSFileItems: - del self._cvs_file_items - - self.text_record_db.recompute_refcounts(cvs_file_items) - self.text_record_db.free_unused() - self._rcs_trees[cvs_file_items.cvs_file.id] = self.text_record_db - del self.text_record_db - - def finish(self): - self._rcs_deltas.close() - self._rcs_trees.close() - - -class InternalRevisionExcluder(RevisionExcluder): - """The RevisionExcluder used by InternalRevisionReader.""" - - def register_artifacts(self, which_pass): - artifact_manager.register_temp_file_needed( - config.RCS_TREES_STORE, which_pass - ) - artifact_manager.register_temp_file_needed( - config.RCS_TREES_INDEX_TABLE, which_pass - ) - artifact_manager.register_temp_file( - config.RCS_TREES_FILTERED_STORE, which_pass - ) - artifact_manager.register_temp_file( - config.RCS_TREES_FILTERED_INDEX_TABLE, which_pass - ) - - def start(self): - self._tree_db = IndexedDatabase( - artifact_manager.get_temp_file(config.RCS_TREES_STORE), - artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE), - DB_OPEN_READ) - primer = (FullTextRecord, DeltaTextRecord) - self._new_tree_db = IndexedDatabase( - artifact_manager.get_temp_file(config.RCS_TREES_FILTERED_STORE), - artifact_manager.get_temp_file(config.RCS_TREES_FILTERED_INDEX_TABLE), - DB_OPEN_NEW, PrimedPickleSerializer(primer)) - - def process_file(self, cvs_file_items): - text_record_db = self._tree_db[cvs_file_items.cvs_file.id] - text_record_db.recompute_refcounts(cvs_file_items) - text_record_db.free_unused() - self._new_tree_db[cvs_file_items.cvs_file.id] = text_record_db - - def finish(self): - self._tree_db.close() - self._new_tree_db.close() - - -class _KeywordExpander: - """A class whose instances provide substitutions for CVS keywords. - - This class is used via its __call__() method, which should be called - with a match object representing a match for a CVS keyword string. - The method returns the replacement for the matched text. - - The __call__() method works by calling the method with the same name - as that of the CVS keyword (converted to lower case). - - Instances of this class can be passed as the REPL argument to - re.sub().""" - - date_fmt_old = "%Y/%m/%d %H:%M:%S" # CVS 1.11, rcs - date_fmt_new = "%Y-%m-%d %H:%M:%S" # CVS 1.12 - - date_fmt = date_fmt_new - - @classmethod - def use_old_date_format(klass): - """Class method to ensure exact compatibility with CVS 1.11 - output. Use this if you want to verify your conversion and you're - using CVS 1.11.""" - klass.date_fmt = klass.date_fmt_old - - def __init__(self, cvs_rev): - self.cvs_rev = cvs_rev - - def __call__(self, match): - return '$%s: %s $' % \ - (match.group(1), getattr(self, match.group(1).lower())(),) - - def author(self): - return Ctx()._metadata_db[self.cvs_rev.metadata_id].original_author - - def date(self): - return time.strftime(self.date_fmt, - time.gmtime(self.cvs_rev.timestamp)) - - def header(self): - return '%s %s %s %s Exp' % \ - (self.source(), self.cvs_rev.rev, self.date(), self.author()) - - def id(self): - return '%s %s %s %s Exp' % \ - (self.rcsfile(), self.cvs_rev.rev, self.date(), self.author()) - - def locker(self): - # Handle kvl like kv, as a converted repo is supposed to have no - # locks. - return '' - - def log(self): - # Would need some special handling. - return 'not supported by cvs2svn' - - def name(self): - # Cannot work, as just creating a new symbol does not check out - # the revision again. - return 'not supported by cvs2svn' - - def rcsfile(self): - return self.cvs_rev.cvs_file.basename + ",v" - - def revision(self): - return self.cvs_rev.rev - - def source(self): - project = self.cvs_rev.cvs_file.project - return project.cvs_repository_root + '/' + project.cvs_module + \ - self.cvs_rev.cvs_file.cvs_path + ",v" - - def state(self): - # We check out only live revisions. - return 'Exp' - - -class InternalRevisionReader(RevisionReader): - """A RevisionReader that reads the contents from an own delta store.""" - - _kws = 'Author|Date|Header|Id|Locker|Log|Name|RCSfile|Revision|Source|State' - _kw_re = re.compile(r'\$(' + _kws + r'):[^$\n]*\$') - _kwo_re = re.compile(r'\$(' + _kws + r')(:[^$\n]*)?\$') - - def __init__(self, compress): - self._compress = compress - - def register_artifacts(self, which_pass): - artifact_manager.register_temp_file(config.CVS_CHECKOUT_DB, which_pass) - artifact_manager.register_temp_file_needed( - config.RCS_DELTAS_STORE, which_pass - ) - artifact_manager.register_temp_file_needed( - config.RCS_DELTAS_INDEX_TABLE, which_pass - ) - artifact_manager.register_temp_file_needed( - config.RCS_TREES_FILTERED_STORE, which_pass - ) - artifact_manager.register_temp_file_needed( - config.RCS_TREES_FILTERED_INDEX_TABLE, which_pass - ) - - def start(self): - self._delta_db = IndexedDatabase( - artifact_manager.get_temp_file(config.RCS_DELTAS_STORE), - artifact_manager.get_temp_file(config.RCS_DELTAS_INDEX_TABLE), - DB_OPEN_READ) - self._delta_db.__delitem__ = lambda id: None - self._tree_db = IndexedDatabase( - artifact_manager.get_temp_file(config.RCS_TREES_FILTERED_STORE), - artifact_manager.get_temp_file(config.RCS_TREES_FILTERED_INDEX_TABLE), - DB_OPEN_READ) - ser = MarshalSerializer() - if self._compress: - ser = CompressingSerializer(ser) - self._co_db = Database( - artifact_manager.get_temp_file(config.CVS_CHECKOUT_DB), DB_OPEN_NEW, - ser) - - # The set of CVSFile instances whose TextRecords have already been - # read: - self._loaded_files = set() - - # A map { CVSFILE : _FileTree } for files that currently have live - # revisions: - self._text_record_db = TextRecordDatabase(self._delta_db, self._co_db) - - def _get_text_record(self, cvs_rev): - """Return the TextRecord instance for CVS_REV. - - If the TextRecords for CVS_REV.cvs_file haven't been loaded yet, - do so now.""" - - if cvs_rev.cvs_file not in self._loaded_files: - for text_record in self._tree_db[cvs_rev.cvs_file.id].itervalues(): - self._text_record_db.add(text_record) - self._loaded_files.add(cvs_rev.cvs_file) - - return self._text_record_db[cvs_rev.id] - - def get_content_stream(self, cvs_rev, suppress_keyword_substitution=False): - """Check out the text for revision C_REV from the repository. - - Return the text wrapped in a readable file object. If - SUPPRESS_KEYWORD_SUBSTITUTION is True, any RCS keywords will be - _un_expanded prior to returning the file content. Note that $Log$ - never actually generates a log (which makes test 'requires_cvs()' - fail). - - Revisions may be requested in any order, but if they are not - requested in dependency order the checkout database will become - very large. Revisions may be skipped. Each revision may be - requested only once.""" - - try: - text = self._get_text_record(cvs_rev).checkout(self._text_record_db) - except MalformedDeltaException, (msg): - raise FatalError('Malformed RCS delta in %s, revision %s: %s' - % (cvs_rev.cvs_file.get_filename(), cvs_rev.rev, msg)) - if cvs_rev.cvs_file.mode != 'b' and cvs_rev.cvs_file.mode != 'o': - if suppress_keyword_substitution or cvs_rev.cvs_file.mode == 'k': - text = self._kw_re.sub(r'$\1$', text) - else: - text = self._kwo_re.sub(_KeywordExpander(cvs_rev), text) - - return cStringIO.StringIO(text) - - def finish(self): - self._text_record_db.log_leftovers() - - del self._text_record_db - self._delta_db.close() - self._tree_db.close() - self._co_db.close() - diff --git a/cvs2svn_lib/collect_data.py b/cvs2svn_lib/collect_data.py deleted file mode 100644 index 160d7b9..0000000 --- a/cvs2svn_lib/collect_data.py +++ /dev/null @@ -1,1431 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2000-2009 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""Data collection classes. - -This module contains the code used to collect data from the CVS -repository. It parses *,v files, recording all useful information -except for the actual file contents (though even the file contents -might be recorded by the RevisionRecorder if one is configured). - -As a *,v file is parsed, the information pertaining to the file is -accumulated in memory, mostly in _RevisionData, _BranchData, and -_TagData objects. When parsing is complete, a final pass is made over -the data to create some final dependency links, collect statistics, -etc., then the _*Data objects are converted into CVSItem objects -(CVSRevision, CVSBranch, and CVSTag respectively) and the CVSItems are -dumped into databases. - -During the data collection, persistent unique ids are allocated to -many types of objects: CVSFile, Symbol, and CVSItems. CVSItems are a -special case. CVSItem ids are unique across all CVSItem types, and -the ids are carried over from the corresponding data collection -objects: - - _RevisionData -> CVSRevision - - _BranchData -> CVSBranch - - _TagData -> CVSTag - -In a later pass it is possible to convert tags <-> branches. But even -if this occurs, the new branch or tag uses the same id as the old tag -or branch. - -""" - - -import os -import stat -import re - -from cvs2svn_lib import config -from cvs2svn_lib.common import DB_OPEN_NEW -from cvs2svn_lib.common import FatalError -from cvs2svn_lib.common import warning_prefix -from cvs2svn_lib.common import error_prefix -from cvs2svn_lib.common import IllegalSVNPathError -from cvs2svn_lib.common import verify_svn_filename_legal -from cvs2svn_lib.log import Log -from cvs2svn_lib.context import Ctx -from cvs2svn_lib.artifact_manager import artifact_manager -from cvs2svn_lib.project import FileInAndOutOfAtticException -from cvs2svn_lib.cvs_file import CVSPath -from cvs2svn_lib.cvs_file import CVSDirectory -from cvs2svn_lib.cvs_file import CVSFile -from cvs2svn_lib.symbol import Symbol -from cvs2svn_lib.symbol import Trunk -from cvs2svn_lib.cvs_item import CVSRevision -from cvs2svn_lib.cvs_item import CVSBranch -from cvs2svn_lib.cvs_item import CVSTag -from cvs2svn_lib.cvs_item import cvs_revision_type_map -from cvs2svn_lib.cvs_file_items import VendorBranchError -from cvs2svn_lib.cvs_file_items import CVSFileItems -from cvs2svn_lib.key_generator import KeyGenerator -from cvs2svn_lib.cvs_item_database import NewCVSItemStore -from cvs2svn_lib.symbol_statistics import SymbolStatisticsCollector -from cvs2svn_lib.metadata_database import MetadataDatabase -from cvs2svn_lib.metadata_database import MetadataLogger - -import cvs2svn_rcsparse - - -# A regular expression defining "valid" revision numbers (used to -# check that symbol definitions are reasonable). -_valid_revision_re = re.compile(r''' - ^ - (?:\d+\.)+ # Digit groups with trailing dots - \d+ # And the last digit group. - $ - ''', re.VERBOSE) - -_branch_revision_re = re.compile(r''' - ^ - ((?:\d+\.\d+\.)+) # A nonzero even number of digit groups w/trailing dot - (?:0\.)? # CVS sticks an extra 0 here; RCS does not - (\d+) # And the last digit group - $ - ''', re.VERBOSE) - - -def rev_tuple(rev): - """Return a tuple of integers corresponding to revision number REV. - - For example, if REV is '1.2.3.4', then return (1,2,3,4).""" - - return tuple([int(x) for x in rev.split('.')]) - - -def is_trunk_revision(rev): - """Return True iff REV is a trunk revision. - - REV is a revision number corresponding to a specific revision (i.e., - not a whole branch).""" - - return rev.count('.') == 1 - - -def is_branch_revision_number(rev): - """Return True iff REV is a branch revision number. - - REV is a CVS revision number in canonical form (i.e., with zeros - removed). Return True iff it refers to a whole branch, as opposed - to a single revision.""" - - return rev.count('.') % 2 == 0 - - -def is_same_line_of_development(rev1, rev2): - """Return True if rev1 and rev2 are on the same line of - development (i.e., both on trunk, or both on the same branch); - return False otherwise. Either rev1 or rev2 can be None, in - which case automatically return False.""" - - if rev1 is None or rev2 is None: - return False - if rev1.count('.') == 1 and rev2.count('.') == 1: - return True - if rev1[0:rev1.rfind('.')] == rev2[0:rev2.rfind('.')]: - return True - return False - - -class _RevisionData: - """We track the state of each revision so that in set_revision_info, - we can determine if our op is an add/change/delete. We can do this - because in set_revision_info, we'll have all of the _RevisionData - for a file at our fingertips, and we need to examine the state of - our prev_rev to determine if we're an add or a change. Without the - state of the prev_rev, we are unable to distinguish between an add - and a change.""" - - def __init__(self, cvs_rev_id, rev, timestamp, author, state): - # The id of this revision: - self.cvs_rev_id = cvs_rev_id - self.rev = rev - self.timestamp = timestamp - self.author = author - self.original_timestamp = timestamp - self.state = state - - # If this is the first revision on a branch, then this is the - # branch_data of that branch; otherwise it is None. - self.parent_branch_data = None - - # The revision number of the parent of this revision along the - # same line of development, if any. For the first revision R on a - # branch, we consider the revision from which R sprouted to be the - # 'parent'. If this is the root revision in the file's revision - # tree, then this field is None. - # - # Note that this revision can't be determined arithmetically (due - # to cvsadmin -o), which is why this field is necessary. - self.parent = None - - # The revision number of the primary child of this revision (the - # child along the same line of development), if any; otherwise, - # None. - self.child = None - - # The _BranchData instances of branches that sprout from this - # revision, sorted in ascending order by branch number. It would - # be inconvenient to initialize it here because we would have to - # scan through all branches known by the _SymbolDataCollector to - # find the ones having us as the parent. Instead, this - # information is filled in by - # _FileDataCollector._resolve_dependencies() and sorted by - # _FileDataCollector._sort_branches(). - self.branches_data = [] - - # The revision numbers of the first commits on any branches on - # which commits occurred. This dependency is kept explicitly - # because otherwise a revision-only topological sort would miss - # the dependency that exists via branches_data. - self.branches_revs_data = [] - - # The _TagData instances of tags that are connected to this - # revision. - self.tags_data = [] - - # A token that may be returned from - # RevisionRecorder.record_text(). It can be used by - # RevisionReader to obtain the text again. - self.revision_recorder_token = None - - def get_first_on_branch_id(self): - return self.parent_branch_data and self.parent_branch_data.id - - -class _SymbolData: - """Collection area for information about a symbol in a single CVSFile. - - SYMBOL is an instance of Symbol, undifferentiated as a Branch or a - Tag regardless of whether self is a _BranchData or a _TagData.""" - - def __init__(self, id, symbol): - """Initialize an object for SYMBOL.""" - - # The unique id that will be used for this particular symbol in - # this particular file. This same id will be used for the CVSItem - # that is derived from this instance. - self.id = id - - # An instance of Symbol. - self.symbol = symbol - - -class _BranchData(_SymbolData): - """Collection area for information about a Branch in a single CVSFile.""" - - def __init__(self, id, symbol, branch_number): - _SymbolData.__init__(self, id, symbol) - - # The branch number (e.g., '1.5.2') of this branch. - self.branch_number = branch_number - - # The revision number of the revision from which this branch - # sprouts (e.g., '1.5'). - self.parent = self.branch_number[:self.branch_number.rindex(".")] - - # The revision number of the first commit on this branch, if any - # (e.g., '1.5.2.1'); otherwise, None. - self.child = None - - -class _TagData(_SymbolData): - """Collection area for information about a Tag in a single CVSFile.""" - - def __init__(self, id, symbol, rev): - _SymbolData.__init__(self, id, symbol) - - # The revision number being tagged (e.g., '1.5.2.3'). - self.rev = rev - - -class _SymbolDataCollector(object): - """Collect information about symbols in a single CVSFile.""" - - def __init__(self, fdc, cvs_file): - self.fdc = fdc - self.cvs_file = cvs_file - - self.pdc = self.fdc.pdc - self.collect_data = self.fdc.collect_data - - # A list [(name, revision), ...] of symbols defined in the header - # of the file. The name has already been transformed using the - # symbol transform rules. If the symbol transform rules indicate - # that the symbol should be ignored, then it is never added to - # this list. This list is processed then deleted in - # process_symbols(). - self._symbol_defs = [] - - # A set containing the transformed names of symbols in this file - # (used to detect duplicats during processing of unlabeled - # branches): - self._defined_symbols = set() - - # Map { branch_number : _BranchData }, where branch_number has an - # odd number of digits. - self.branches_data = { } - - # Map { revision : [ tag_data ] }, where revision has an even - # number of digits, and the value is a list of _TagData objects - # for tags that apply to that revision. - self.tags_data = { } - - def _add_branch(self, name, branch_number): - """Record that BRANCH_NUMBER is the branch number for branch NAME, - and derive and record the revision from which NAME sprouts. - BRANCH_NUMBER is an RCS branch number with an odd number of - components, for example '1.7.2' (never '1.7.0.2'). Return the - _BranchData instance (which is usually newly-created).""" - - branch_data = self.branches_data.get(branch_number) - - if branch_data is not None: - Log().warn( - "%s: in '%s':\n" - " branch '%s' already has name '%s',\n" - " cannot also have name '%s', ignoring the latter\n" - % (warning_prefix, - self.cvs_file.filename, branch_number, - branch_data.symbol.name, name) - ) - return branch_data - - symbol = self.pdc.get_symbol(name) - branch_data = _BranchData( - self.collect_data.item_key_generator.gen_id(), symbol, branch_number - ) - self.branches_data[branch_number] = branch_data - return branch_data - - def _construct_distinct_name(self, name, original_name): - """Construct a distinct symbol name from NAME. - - If NAME is distinct, return it. If it is already used in this - file (as determined from its presence in self._defined_symbols), - construct and return a new name that is not already used.""" - - if name not in self._defined_symbols: - return name - else: - index = 1 - while True: - dup_name = '%s-DUPLICATE-%d' % (name, index,) - if dup_name not in self._defined_symbols: - self.collect_data.record_fatal_error( - "Symbol name '%s' is already used in '%s'.\n" - "The unlabeled branch '%s' must be renamed using " - "--symbol-transform." - % (name, self.cvs_file.filename, original_name,) - ) - return dup_name - - def _add_unlabeled_branch(self, branch_number): - original_name = "unlabeled-" + branch_number - name = self.transform_symbol(original_name, branch_number) - if name is None: - self.collect_data.record_fatal_error( - "The unlabeled branch '%s' in '%s' contains commits.\n" - "It may not be ignored via a symbol transform. (Use --exclude " - "instead.)" - % (original_name, self.cvs_file.filename,) - ) - # Retain the original name to allow the conversion to continue: - name = original_name - - distinct_name = self._construct_distinct_name(name, original_name) - self._defined_symbols.add(distinct_name) - return self._add_branch(distinct_name, branch_number) - - def _add_tag(self, name, revision): - """Record that tag NAME refers to the specified REVISION.""" - - symbol = self.pdc.get_symbol(name) - tag_data = _TagData( - self.collect_data.item_key_generator.gen_id(), symbol, revision - ) - self.tags_data.setdefault(revision, []).append(tag_data) - return tag_data - - def transform_symbol(self, name, revision): - """Transform a symbol according to the project's symbol transforms. - - Transform the symbol with the original name NAME and canonicalized - revision number REVISION. Return the new symbol name or None if - the symbol should be ignored entirely. - - Log the results of the symbol transform if necessary.""" - - old_name = name - # Apply any user-defined symbol transforms to the symbol name: - name = self.cvs_file.project.transform_symbol( - self.cvs_file, name, revision - ) - - if name is None: - # Ignore symbol: - self.pdc.log_symbol_transform(old_name, None) - Log().verbose( - " symbol '%s'=%s ignored in %s" - % (old_name, revision, self.cvs_file.filename,) - ) - else: - if name != old_name: - self.pdc.log_symbol_transform(old_name, name) - Log().verbose( - " symbol '%s'=%s transformed to '%s' in %s" - % (old_name, revision, name, self.cvs_file.filename,) - ) - - return name - - def define_symbol(self, name, revision): - """Record a symbol definition for later processing.""" - - # Canonicalize the revision number: - revision = _branch_revision_re.sub(r'\1\2', revision) - - # Apply any user-defined symbol transforms to the symbol name: - name = self.transform_symbol(name, revision) - - if name is not None: - # Verify that the revision number is valid: - if _valid_revision_re.match(revision): - # The revision number is valid; record it for later processing: - self._symbol_defs.append( (name, revision) ) - else: - Log().warn( - 'In %r:\n' - ' branch %r references invalid revision %s\n' - ' and will be ignored.' - % (self.cvs_file.filename, name, revision,) - ) - - def _eliminate_trivial_duplicate_defs(self, symbol_defs): - """Iterate through SYMBOL_DEFS, Removing identical duplicate definitions. - - Duplicate definitions of symbol names have been seen in the wild, - and they can also happen when --symbol-transform is used. If a - symbol is defined to the same revision number repeatedly, then - ignore all but the last definition.""" - - # Make a copy, since we have to iterate through the definitions - # twice: - symbol_defs = list(symbol_defs) - - # A map { (name, revision) : [index,...] } of the indexes where - # symbol definitions name=revision were found: - known_definitions = {} - for (i, symbol_def) in enumerate(symbol_defs): - known_definitions.setdefault(symbol_def, []).append(i) - - # A set of the indexes of entries that have to be removed from - # symbol_defs: - dup_indexes = set() - for ((name, revision), indexes) in known_definitions.iteritems(): - if len(indexes) > 1: - Log().verbose( - "in %r:\n" - " symbol %s:%s defined multiple times; ignoring duplicates\n" - % (self.cvs_file.filename, name, revision,) - ) - dup_indexes.update(indexes[:-1]) - - for (i, symbol_def) in enumerate(symbol_defs): - if i not in dup_indexes: - yield symbol_def - - def _process_duplicate_defs(self, symbol_defs): - """Iterate through SYMBOL_DEFS, processing duplicate names. - - Duplicate definitions of symbol names have been seen in the wild, - and they can also happen when --symbol-transform is used. If a - symbol is defined multiple times, then it is a fatal error. This - method should be called after _eliminate_trivial_duplicate_defs().""" - - # Make a copy, since we have to access multiple times: - symbol_defs = list(symbol_defs) - - # A map {name : [index,...]} mapping the names of symbols to a - # list of their definitions' indexes in symbol_defs: - known_symbols = {} - for (i, (name, revision)) in enumerate(symbol_defs): - known_symbols.setdefault(name, []).append(i) - - known_symbols = known_symbols.items() - known_symbols.sort() - dup_indexes = set() - for (name, indexes) in known_symbols: - if len(indexes) > 1: - # This symbol was defined multiple times. - self.collect_data.record_fatal_error( - "Multiple definitions of the symbol '%s' in '%s': %s" % ( - name, self.cvs_file.filename, - ' '.join([symbol_defs[i][1] for i in indexes]), - ) - ) - # Ignore all but the last definition for now, to allow the - # conversion to proceed: - dup_indexes.update(indexes[:-1]) - - for (i, symbol_def) in enumerate(symbol_defs): - if i not in dup_indexes: - yield symbol_def - - def _process_symbol(self, name, revision): - """Process a symbol called NAME, which is associated with REVISON. - - REVISION is a canonical revision number with zeros removed, for - example: '1.7', '1.7.2', or '1.1.1' or '1.1.1.1'. NAME is a - transformed branch or tag name.""" - - # Add symbol to our records: - if is_branch_revision_number(revision): - self._add_branch(name, revision) - else: - self._add_tag(name, revision) - - def process_symbols(self): - """Process the symbol definitions from SELF._symbol_defs.""" - - symbol_defs = self._symbol_defs - del self._symbol_defs - - symbol_defs = self._eliminate_trivial_duplicate_defs(symbol_defs) - symbol_defs = self._process_duplicate_defs(symbol_defs) - - for (name, revision) in symbol_defs: - self._defined_symbols.add(name) - self._process_symbol(name, revision) - - @staticmethod - def rev_to_branch_number(revision): - """Return the branch_number of the branch on which REVISION lies. - - REVISION is a branch revision number with an even number of - components; for example '1.7.2.1' (never '1.7.2' nor '1.7.0.2'). - The return value is the branch number (for example, '1.7.2'). - Return none iff REVISION is a trunk revision such as '1.2'.""" - - if is_trunk_revision(revision): - return None - return revision[:revision.rindex(".")] - - def rev_to_branch_data(self, revision): - """Return the branch_data of the branch on which REVISION lies. - - REVISION must be a branch revision number with an even number of - components; for example '1.7.2.1' (never '1.7.2' nor '1.7.0.2'). - Raise KeyError iff REVISION is unknown.""" - - assert not is_trunk_revision(revision) - - return self.branches_data[self.rev_to_branch_number(revision)] - - def rev_to_lod(self, revision): - """Return the line of development on which REVISION lies. - - REVISION must be a revision number with an even number of - components. Raise KeyError iff REVISION is unknown.""" - - if is_trunk_revision(revision): - return self.pdc.trunk - else: - return self.rev_to_branch_data(revision).symbol - - -class _FileDataCollector(cvs2svn_rcsparse.Sink): - """Class responsible for collecting RCS data for a particular file. - - Any collected data that need to be remembered are stored into the - referenced CollectData instance.""" - - def __init__(self, pdc, cvs_file): - """Create an object that is prepared to receive data for CVS_FILE. - CVS_FILE is a CVSFile instance. COLLECT_DATA is used to store the - information collected about the file.""" - - self.pdc = pdc - self.cvs_file = cvs_file - - self.collect_data = self.pdc.collect_data - self.project = self.cvs_file.project - - # A place to store information about the symbols in this file: - self.sdc = _SymbolDataCollector(self, self.cvs_file) - - # { revision : _RevisionData instance } - self._rev_data = { } - - # Lists [ (parent, child) ] of revision number pairs indicating - # that revision child depends on revision parent along the main - # line of development. - self._primary_dependencies = [] - - # If set, this is an RCS branch number -- rcsparse calls this the - # "principal branch", but CVS and RCS refer to it as the "default - # branch", so that's what we call it, even though the rcsparse API - # setter method is still 'set_principal_branch'. - self.default_branch = None - - # True iff revision 1.1 of the file appears to have been imported - # (as opposed to added normally). - self._file_imported = False - - def _get_rev_id(self, revision): - if revision is None: - return None - return self._rev_data[revision].cvs_rev_id - - def set_principal_branch(self, branch): - """This is a callback method declared in Sink.""" - - if branch.find('.') == -1: - # This just sets the default branch to trunk. Normally this - # shouldn't occur, but it has been seen in at least one CVS - # repository. Just ignore it. - pass - else: - self.default_branch = branch - - def set_expansion(self, mode): - """This is a callback method declared in Sink.""" - - self.cvs_file.mode = mode - - def define_tag(self, name, revision): - """Remember the symbol name and revision, but don't process them yet. - - This is a callback method declared in Sink.""" - - self.sdc.define_symbol(name, revision) - - def admin_completed(self): - """This is a callback method declared in Sink.""" - - self.sdc.process_symbols() - - def define_revision(self, revision, timestamp, author, state, - branches, next): - """This is a callback method declared in Sink.""" - - for branch in branches: - try: - branch_data = self.sdc.rev_to_branch_data(branch) - except KeyError: - # Normally we learn about the branches from the branch names - # and numbers parsed from the symbolic name header. But this - # must have been an unlabeled branch that slipped through the - # net. Generate a name for it and create a _BranchData record - # for it now. - branch_data = self.sdc._add_unlabeled_branch( - self.sdc.rev_to_branch_number(branch)) - - assert branch_data.child is None - branch_data.child = branch - - if revision in self._rev_data: - # This revision has already been seen. - Log().error('File %r contains duplicate definitions of revision %s.' - % (self.cvs_file.filename, revision,)) - raise RuntimeError - - # Record basic information about the revision: - rev_data = _RevisionData( - self.collect_data.item_key_generator.gen_id(), - revision, int(timestamp), author, state) - self._rev_data[revision] = rev_data - - # When on trunk, the RCS 'next' revision number points to what - # humans might consider to be the 'previous' revision number. For - # example, 1.3's RCS 'next' is 1.2. - # - # However, on a branch, the RCS 'next' revision number really does - # point to what humans would consider to be the 'next' revision - # number. For example, 1.1.2.1's RCS 'next' would be 1.1.2.2. - # - # In other words, in RCS, 'next' always means "where to find the next - # deltatext that you need this revision to retrieve. - # - # That said, we don't *want* RCS's behavior here, so we determine - # whether we're on trunk or a branch and set the dependencies - # accordingly. - if next: - if is_trunk_revision(revision): - self._primary_dependencies.append( (next, revision,) ) - else: - self._primary_dependencies.append( (revision, next,) ) - - def _resolve_primary_dependencies(self): - """Resolve the dependencies listed in self._primary_dependencies.""" - - for (parent, child,) in self._primary_dependencies: - parent_data = self._rev_data[parent] - assert parent_data.child is None - parent_data.child = child - - child_data = self._rev_data[child] - assert child_data.parent is None - child_data.parent = parent - - def _resolve_branch_dependencies(self): - """Resolve dependencies involving branches.""" - - for branch_data in self.sdc.branches_data.values(): - # The branch_data's parent has the branch as a child regardless - # of whether the branch had any subsequent commits: - try: - parent_data = self._rev_data[branch_data.parent] - except KeyError: - Log().warn( - 'In %r:\n' - ' branch %r references non-existing revision %s\n' - ' and will be ignored.' - % (self.cvs_file.filename, branch_data.symbol.name, - branch_data.parent,)) - del self.sdc.branches_data[branch_data.branch_number] - else: - parent_data.branches_data.append(branch_data) - - # If the branch has a child (i.e., something was committed on - # the branch), then we store a reference to the branch_data - # there, define the child's parent to be the branch's parent, - # and list the child in the branch parent's branches_revs_data: - if branch_data.child is not None: - child_data = self._rev_data[branch_data.child] - assert child_data.parent_branch_data is None - child_data.parent_branch_data = branch_data - assert child_data.parent is None - child_data.parent = branch_data.parent - parent_data.branches_revs_data.append(branch_data.child) - - def _sort_branches(self): - """Sort the branches sprouting from each revision in creation order. - - Creation order is taken to be the reverse of the order that they - are listed in the symbols part of the RCS file. (If a branch is - created then deleted, a later branch can be assigned the recycled - branch number; therefore branch numbers are not an indication of - creation order.)""" - - for rev_data in self._rev_data.values(): - rev_data.branches_data.sort(lambda a, b: - cmp(a.id, b.id)) - - def _resolve_tag_dependencies(self): - """Resolve dependencies involving tags.""" - - for (rev, tag_data_list) in self.sdc.tags_data.items(): - try: - parent_data = self._rev_data[rev] - except KeyError: - Log().warn( - 'In %r:\n' - ' the following tag(s) reference non-existing revision %s\n' - ' and will be ignored:\n' - ' %s' % ( - self.cvs_file.filename, rev, - ', '.join([repr(tag_data.symbol.name) - for tag_data in tag_data_list]),)) - del self.sdc.tags_data[rev] - else: - for tag_data in tag_data_list: - assert tag_data.rev == rev - # The tag_data's rev has the tag as a child: - parent_data.tags_data.append(tag_data) - - def _determine_operation(self, rev_data): - prev_rev_data = self._rev_data.get(rev_data.parent) - return cvs_revision_type_map[( - rev_data.state != 'dead', - prev_rev_data is not None and prev_rev_data.state != 'dead', - )] - - def _get_cvs_revision(self, rev_data): - """Create and return a CVSRevision for REV_DATA.""" - - branch_ids = [ - branch_data.id - for branch_data in rev_data.branches_data - ] - - branch_commit_ids = [ - self._get_rev_id(rev) - for rev in rev_data.branches_revs_data - ] - - tag_ids = [ - tag_data.id - for tag_data in rev_data.tags_data - ] - - revision_type = self._determine_operation(rev_data) - - return revision_type( - self._get_rev_id(rev_data.rev), self.cvs_file, - rev_data.timestamp, None, - self._get_rev_id(rev_data.parent), - self._get_rev_id(rev_data.child), - rev_data.rev, - True, - self.sdc.rev_to_lod(rev_data.rev), - rev_data.get_first_on_branch_id(), - False, None, None, - tag_ids, branch_ids, branch_commit_ids, - rev_data.revision_recorder_token) - - def _get_cvs_revisions(self): - """Generate the CVSRevisions present in this file.""" - - for rev_data in self._rev_data.itervalues(): - yield self._get_cvs_revision(rev_data) - - def _get_cvs_branches(self): - """Generate the CVSBranches present in this file.""" - - for branch_data in self.sdc.branches_data.values(): - yield CVSBranch( - branch_data.id, self.cvs_file, branch_data.symbol, - branch_data.branch_number, - self.sdc.rev_to_lod(branch_data.parent), - self._get_rev_id(branch_data.parent), - self._get_rev_id(branch_data.child), - None, - ) - - def _get_cvs_tags(self): - """Generate the CVSTags present in this file.""" - - for tags_data in self.sdc.tags_data.values(): - for tag_data in tags_data: - yield CVSTag( - tag_data.id, self.cvs_file, tag_data.symbol, - self.sdc.rev_to_lod(tag_data.rev), - self._get_rev_id(tag_data.rev), - None, - ) - - def tree_completed(self): - """The revision tree has been parsed. - - Analyze it for consistency and connect some loose ends. - - This is a callback method declared in Sink.""" - - self._resolve_primary_dependencies() - self._resolve_branch_dependencies() - self._sort_branches() - self._resolve_tag_dependencies() - - # Compute the preliminary CVSFileItems for this file: - cvs_items = [] - cvs_items.extend(self._get_cvs_revisions()) - cvs_items.extend(self._get_cvs_branches()) - cvs_items.extend(self._get_cvs_tags()) - self._cvs_file_items = CVSFileItems( - self.cvs_file, self.pdc.trunk, cvs_items - ) - - self._cvs_file_items.check_link_consistency() - - # Tell the revision recorder about the file dependency tree. - self.collect_data.revision_recorder.start_file(self._cvs_file_items) - - def set_revision_info(self, revision, log, text): - """This is a callback method declared in Sink.""" - - rev_data = self._rev_data[revision] - cvs_rev = self._cvs_file_items[rev_data.cvs_rev_id] - - if cvs_rev.metadata_id is not None: - # Users have reported problems with repositories in which the - # deltatext block for revision 1.1 appears twice. It is not - # known whether this results from a CVS/RCS bug, or from botched - # hand-editing of the repository. In any case, empirically, cvs - # and rcs both use the first version when checking out data, so - # that's what we will do. (For the record: "cvs log" fails on - # such a file; "rlog" prints the log message from the first - # block and ignores the second one.) - Log().warn( - "%s: in '%s':\n" - " Deltatext block for revision %s appeared twice;\n" - " ignoring the second occurrence.\n" - % (warning_prefix, self.cvs_file.filename, revision,) - ) - return - - if is_trunk_revision(revision): - branch_name = None - else: - branch_name = self.sdc.rev_to_branch_data(revision).symbol.name - - cvs_rev.metadata_id = self.collect_data.metadata_logger.store( - self.project, branch_name, rev_data.author, log - ) - cvs_rev.deltatext_exists = bool(text) - - # If this is revision 1.1, determine whether the file appears to - # have been created via 'cvs add' instead of 'cvs import'. The - # test is that the log message CVS uses for 1.1 in imports is - # "Initial revision\n" with no period. (This fact helps determine - # whether this file might have had a default branch in the past.) - if revision == '1.1': - self._file_imported = (log == 'Initial revision\n') - - cvs_rev.revision_recorder_token = \ - self.collect_data.revision_recorder.record_text(cvs_rev, log, text) - - def parse_completed(self): - """Finish the processing of this file. - - This is a callback method declared in Sink.""" - - # Make sure that there was an info section for each revision: - for cvs_item in self._cvs_file_items.values(): - if isinstance(cvs_item, CVSRevision) and cvs_item.metadata_id is None: - self.collect_data.record_fatal_error( - '%r has no deltatext section for revision %s' - % (self.cvs_file.filename, cvs_item.rev,) - ) - - def _process_ntdbrs(self): - """Fix up any non-trunk default branch revisions (if present). - - If a non-trunk default branch is determined to have existed, yield - the _RevisionData.ids for all revisions that were once non-trunk - default revisions, in dependency order. - - There are two cases to handle: - - One case is simple. The RCS file lists a default branch - explicitly in its header, such as '1.1.1'. In this case, we know - that every revision on the vendor branch is to be treated as head - of trunk at that point in time. - - But there's also a degenerate case. The RCS file does not - currently have a default branch, yet we can deduce that for some - period in the past it probably *did* have one. For example, the - file has vendor revisions 1.1.1.1 -> 1.1.1.96, all of which are - dated before 1.2, and then it has 1.1.1.97 -> 1.1.1.100 dated - after 1.2. In this case, we should record 1.1.1.96 as the last - vendor revision to have been the head of the default branch. - - If any non-trunk default branch revisions are found: - - - Set their ntdbr members to True. - - - Connect the last one with revision 1.2. - - - Remove revision 1.1 if it is not needed. - - """ - - try: - if self.default_branch: - vendor_cvs_branch_id = self.sdc.branches_data[self.default_branch].id - vendor_lod_items = self._cvs_file_items.get_lod_items( - self._cvs_file_items[vendor_cvs_branch_id] - ) - if not self._cvs_file_items.process_live_ntdb(vendor_lod_items): - return - elif self._file_imported: - vendor_branch_data = self.sdc.branches_data.get('1.1.1') - if vendor_branch_data is None: - return - else: - vendor_lod_items = self._cvs_file_items.get_lod_items( - self._cvs_file_items[vendor_branch_data.id] - ) - if not self._cvs_file_items.process_historical_ntdb( - vendor_lod_items - ): - return - else: - return - except VendorBranchError, e: - self.collect_data.record_fatal_error(str(e)) - return - - if self._file_imported: - self._cvs_file_items.imported_remove_1_1(vendor_lod_items) - - self._cvs_file_items.check_link_consistency() - - def get_cvs_file_items(self): - """Finish up and return a CVSFileItems instance for this file. - - This method must only be called once.""" - - self._process_ntdbrs() - - # Break a circular reference loop, allowing the memory for self - # and sdc to be freed. - del self.sdc - - return self._cvs_file_items - - -class _ProjectDataCollector: - def __init__(self, collect_data, project): - self.collect_data = collect_data - self.project = project - self.num_files = 0 - - # The Trunk LineOfDevelopment object for this project: - self.trunk = Trunk( - self.collect_data.symbol_key_generator.gen_id(), self.project - ) - self.project.trunk_id = self.trunk.id - - # This causes a record for self.trunk to spring into existence: - self.collect_data.symbol_stats[self.trunk] - - # A map { name -> Symbol } for all known symbols in this project. - # The symbols listed here are undifferentiated into Branches and - # Tags because the same name might appear as a branch in one file - # and a tag in another. - self.symbols = {} - - # A map { (old_name, new_name) : count } indicating how many files - # were affected by each each symbol name transformation: - self.symbol_transform_counts = {} - - def get_symbol(self, name): - """Return the Symbol object for the symbol named NAME in this project. - - If such a symbol does not yet exist, allocate a new symbol_id, - create a Symbol instance, store it in self.symbols, and return it.""" - - symbol = self.symbols.get(name) - if symbol is None: - symbol = Symbol( - self.collect_data.symbol_key_generator.gen_id(), - self.project, name) - self.symbols[name] = symbol - return symbol - - def log_symbol_transform(self, old_name, new_name): - """Record that OLD_NAME was transformed to NEW_NAME in one file. - - This information is used to generated a statistical summary of - symbol transforms.""" - - try: - self.symbol_transform_counts[old_name, new_name] += 1 - except KeyError: - self.symbol_transform_counts[old_name, new_name] = 1 - - def summarize_symbol_transforms(self): - if self.symbol_transform_counts and Log().is_on(Log.NORMAL): - log = Log() - log.normal('Summary of symbol transforms:') - transforms = self.symbol_transform_counts.items() - transforms.sort() - for ((old_name, new_name), count) in transforms: - if new_name is None: - log.normal(' "%s" ignored in %d files' % (old_name, count,)) - else: - log.normal( - ' "%s" transformed to "%s" in %d files' - % (old_name, new_name, count,) - ) - - def _process_cvs_file_items(self, cvs_file_items): - """Process the CVSFileItems from one CVSFile.""" - - # Remove CVSRevisionDeletes that are not needed: - cvs_file_items.remove_unneeded_deletes(self.collect_data.metadata_db) - - # Remove initial branch deletes that are not needed: - cvs_file_items.remove_initial_branch_deletes( - self.collect_data.metadata_db - ) - - # If this is a --trunk-only conversion, discard all branches and - # tags, then draft any non-trunk default branch revisions to - # trunk: - if Ctx().trunk_only: - cvs_file_items.exclude_non_trunk() - - self.collect_data.revision_recorder.finish_file(cvs_file_items) - self.collect_data.add_cvs_file_items(cvs_file_items) - self.collect_data.symbol_stats.register(cvs_file_items) - - def process_file(self, cvs_file): - Log().normal(cvs_file.filename) - fdc = _FileDataCollector(self, cvs_file) - try: - cvs2svn_rcsparse.parse(open(cvs_file.filename, 'rb'), fdc) - except (cvs2svn_rcsparse.common.RCSParseError, ValueError, RuntimeError): - self.collect_data.record_fatal_error( - "%r is not a valid ,v file" % (cvs_file.filename,) - ) - # Abort the processing of this file, but let the pass continue - # with other files: - return - except: - Log().warn("Exception occurred while parsing %s" % cvs_file.filename) - raise - else: - self.num_files += 1 - - cvs_file_items = fdc.get_cvs_file_items() - - del fdc - - self._process_cvs_file_items(cvs_file_items) - - -class CollectData: - """Repository for data collected by parsing the CVS repository files. - - This class manages the databases into which information collected - from the CVS repository is stored. The data are stored into this - class by _FileDataCollector instances, one of which is created for - each file to be parsed.""" - - def __init__(self, revision_recorder, stats_keeper): - self.revision_recorder = revision_recorder - self._cvs_item_store = NewCVSItemStore( - artifact_manager.get_temp_file(config.CVS_ITEMS_STORE)) - self.metadata_db = MetadataDatabase( - artifact_manager.get_temp_file(config.METADATA_STORE), - artifact_manager.get_temp_file(config.METADATA_INDEX_TABLE), - DB_OPEN_NEW, - ) - self.metadata_logger = MetadataLogger(self.metadata_db) - self.fatal_errors = [] - self.num_files = 0 - self.symbol_stats = SymbolStatisticsCollector() - self.stats_keeper = stats_keeper - - # Key generator for CVSFiles: - self.file_key_generator = KeyGenerator() - - # Key generator for CVSItems: - self.item_key_generator = KeyGenerator() - - # Key generator for Symbols: - self.symbol_key_generator = KeyGenerator() - - self.revision_recorder.start() - - def record_fatal_error(self, err): - """Record that fatal error ERR was found. - - ERR is a string (without trailing newline) describing the error. - Output the error to stderr immediately, and record a copy to be - output again in a summary at the end of CollectRevsPass.""" - - err = '%s: %s' % (error_prefix, err,) - Log().error(err + '\n') - self.fatal_errors.append(err) - - def add_cvs_directory(self, cvs_directory): - """Record CVS_DIRECTORY.""" - - Ctx()._cvs_file_db.log_file(cvs_directory) - - def add_cvs_file_items(self, cvs_file_items): - """Record the information from CVS_FILE_ITEMS. - - Store the CVSFile to _cvs_file_db under its persistent id, store - the CVSItems, and record the CVSItems to self.stats_keeper.""" - - Ctx()._cvs_file_db.log_file(cvs_file_items.cvs_file) - self._cvs_item_store.add(cvs_file_items) - - self.stats_keeper.record_cvs_file(cvs_file_items.cvs_file) - for cvs_item in cvs_file_items.values(): - self.stats_keeper.record_cvs_item(cvs_item) - - def _get_cvs_file( - self, parent_directory, basename, file_in_attic, leave_in_attic=False - ): - """Return a CVSFile describing the file with name BASENAME. - - PARENT_DIRECTORY is the CVSDirectory instance describing the - directory that physically holds this file in the filesystem. - BASENAME must be the base name of a *,v file within - PARENT_DIRECTORY. - - FILE_IN_ATTIC is a boolean telling whether the specified file is - in an Attic subdirectory. If FILE_IN_ATTIC is True, then: - - - If LEAVE_IN_ATTIC is True, then leave the 'Attic' component in - the filename. - - - Otherwise, raise FileInAndOutOfAtticException if a file with the - same filename appears outside of Attic. - - The CVSFile is assigned a new unique id. All of the CVSFile - information is filled in except mode (which can only be determined - by parsing the file). - - Raise FatalError if the resulting filename would not be legal in - SVN.""" - - filename = os.path.join(parent_directory.filename, basename) - try: - verify_svn_filename_legal(basename[:-2]) - except IllegalSVNPathError, e: - raise FatalError( - 'File %r would result in an illegal SVN filename: %s' - % (filename, e,) - ) - - if file_in_attic and not leave_in_attic: - in_attic = True - logical_parent_directory = parent_directory.parent_directory - - # If this file also exists outside of the attic, it's a fatal - # error: - non_attic_filename = os.path.join( - logical_parent_directory.filename, basename, - ) - if os.path.exists(non_attic_filename): - raise FileInAndOutOfAtticException(non_attic_filename, filename) - else: - in_attic = False - logical_parent_directory = parent_directory - - file_stat = os.stat(filename) - - # The size of the file in bytes: - file_size = file_stat[stat.ST_SIZE] - - # Whether or not the executable bit is set: - file_executable = bool(file_stat[0] & stat.S_IXUSR) - - # mode is not known, so we temporarily set it to None. - return CVSFile( - self.file_key_generator.gen_id(), - parent_directory.project, logical_parent_directory, basename[:-2], - in_attic, file_executable, file_size, None - ) - - def _get_attic_file(self, parent_directory, basename): - """Return a CVSFile object for the Attic file at BASENAME. - - PARENT_DIRECTORY is the CVSDirectory that physically contains the - file on the filesystem (i.e., the Attic directory). It is not - necessarily the parent_directory of the CVSFile that will be - returned. - - Return CVSFile, whose parent directory is usually - PARENT_DIRECTORY.parent_directory, but might be PARENT_DIRECTORY - iff CVSFile will remain in the Attic directory.""" - - try: - return self._get_cvs_file(parent_directory, basename, True) - except FileInAndOutOfAtticException, e: - if Ctx().retain_conflicting_attic_files: - Log().warn( - "%s: %s;\n" - " storing the latter into 'Attic' subdirectory.\n" - % (warning_prefix, e) - ) - else: - self.record_fatal_error(str(e)) - - # Either way, return a CVSFile object so that the rest of the - # file processing can proceed: - return self._get_cvs_file( - parent_directory, basename, True, leave_in_attic=True - ) - - def _generate_attic_cvs_files(self, cvs_directory): - """Generate CVSFiles for the files in Attic directory CVS_DIRECTORY. - - Also add CVS_DIRECTORY to self if any files are being retained in - that directory.""" - - retained_attic_file = False - - fnames = os.listdir(cvs_directory.filename) - fnames.sort() - for fname in fnames: - pathname = os.path.join(cvs_directory.filename, fname) - if os.path.isdir(pathname): - Log().warn("Directory %s found within Attic; ignoring" % (pathname,)) - elif fname.endswith(',v'): - cvs_file = self._get_attic_file(cvs_directory, fname) - if cvs_file.parent_directory == cvs_directory: - # This file will be retained in the Attic directory. - retained_attic_file = True - yield cvs_file - - if retained_attic_file: - # If any files were retained in the Attic directory, then write - # the Attic directory to CVSFileDatabase: - self.add_cvs_directory(cvs_directory) - - def _get_non_attic_file(self, parent_directory, basename): - """Return a CVSFile object for the non-Attic file at BASENAME.""" - - return self._get_cvs_file(parent_directory, basename, False) - - def _generate_cvs_files(self, cvs_directory): - """Generate the CVSFiles under non-Attic directory CVS_DIRECTORY. - - Process directories recursively, including Attic directories. - Also create and register CVSDirectories as they are found, and - look for conflicts between the filenames that will result from - files, attic files, and subdirectories.""" - - self.add_cvs_directory(cvs_directory) - - # Map {cvs_file.basename : cvs_file.filename} for files directly - # in cvs_directory: - rcsfiles = {} - - attic_dir = None - - # Non-Attic subdirectories of cvs_directory (to be recursed into): - dirs = [] - - fnames = os.listdir(cvs_directory.filename) - fnames.sort() - for fname in fnames: - pathname = os.path.join(cvs_directory.filename, fname) - if os.path.isdir(pathname): - if fname == 'Attic': - attic_dir = fname - else: - dirs.append(fname) - elif fname.endswith(',v'): - cvs_file = self._get_non_attic_file(cvs_directory, fname) - rcsfiles[cvs_file.basename] = cvs_file.filename - yield cvs_file - else: - # Silently ignore other files: - pass - - # Map {cvs_file.basename : cvs_file.filename} for files in an - # Attic directory within cvs_directory: - attic_rcsfiles = {} - - if attic_dir is not None: - attic_directory = CVSDirectory( - self.file_key_generator.gen_id(), - cvs_directory.project, cvs_directory, 'Attic', - ) - - for cvs_file in self._generate_attic_cvs_files(attic_directory): - if cvs_file.parent_directory == cvs_directory: - attic_rcsfiles[cvs_file.basename] = cvs_file.filename - yield cvs_file - - alldirs = dirs + [attic_dir] - else: - alldirs = dirs - - # Check for conflicts between directory names and the filenames - # that will result from the rcs files (both in this directory and - # in attic). (We recurse into the subdirectories nevertheless, to - # try to detect more problems.) - for fname in alldirs: - pathname = os.path.join(cvs_directory.filename, fname) - for rcsfile_list in [rcsfiles, attic_rcsfiles]: - if fname in rcsfile_list: - self.record_fatal_error( - 'Directory name conflicts with filename. Please remove or ' - 'rename one\n' - 'of the following:\n' - ' "%s"\n' - ' "%s"' - % (pathname, rcsfile_list[fname],) - ) - - # Now recurse into the other subdirectories: - for fname in dirs: - dirname = os.path.join(cvs_directory.filename, fname) - - # Verify that the directory name does not contain any illegal - # characters: - try: - verify_svn_filename_legal(fname) - except IllegalSVNPathError, e: - raise FatalError( - 'Directory %r would result in an illegal SVN path name: %s' - % (dirname, e,) - ) - - sub_directory = CVSDirectory( - self.file_key_generator.gen_id(), - cvs_directory.project, cvs_directory, fname, - ) - - for cvs_file in self._generate_cvs_files(sub_directory): - yield cvs_file - - def process_project(self, project): - Ctx()._projects[project.id] = project - - root_cvs_directory = CVSDirectory( - self.file_key_generator.gen_id(), project, None, '' - ) - project.root_cvs_directory_id = root_cvs_directory.id - pdc = _ProjectDataCollector(self, project) - - found_rcs_file = False - for cvs_file in self._generate_cvs_files(root_cvs_directory): - pdc.process_file(cvs_file) - found_rcs_file = True - - if not found_rcs_file: - self.record_fatal_error( - 'No RCS files found under %r!\n' - 'Are you absolutely certain you are pointing cvs2svn\n' - 'at a CVS repository?\n' - % (project.project_cvs_repos_path,) - ) - - pdc.summarize_symbol_transforms() - - self.num_files += pdc.num_files - Log().verbose('Processed', self.num_files, 'files') - - def _set_cvs_path_ordinals(self): - cvs_files = list(Ctx()._cvs_file_db.itervalues()) - cvs_files.sort(CVSPath.slow_compare) - for (i, cvs_file) in enumerate(cvs_files): - cvs_file.ordinal = i - - def close(self): - """Close the data structures associated with this instance. - - Return a list of fatal errors encountered while processing input. - Each list entry is a string describing one fatal error.""" - - self.revision_recorder.finish() - self.symbol_stats.purge_ghost_symbols() - self.symbol_stats.close() - self.symbol_stats = None - self.metadata_logger = None - self.metadata_db.close() - self.metadata_db = None - self._cvs_item_store.close() - self._cvs_item_store = None - self._set_cvs_path_ordinals() - self.revision_recorder = None - retval = self.fatal_errors - self.fatal_errors = None - return retval - - diff --git a/cvs2svn_lib/common.py b/cvs2svn_lib/common.py deleted file mode 100644 index 8400907..0000000 --- a/cvs2svn_lib/common.py +++ /dev/null @@ -1,409 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2000-2009 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""This module contains common facilities used by cvs2svn.""" - - -import re -import time -import codecs - -from cvs2svn_lib.log import Log - - -# Always use these constants for opening databases. -DB_OPEN_READ = 'r' -DB_OPEN_WRITE = 'w' -DB_OPEN_NEW = 'n' - - -SVN_INVALID_REVNUM = -1 - - -# Warnings and errors start with these strings. They are typically -# followed by a colon and a space, as in "%s: " ==> "WARNING: ". -warning_prefix = "WARNING" -error_prefix = "ERROR" - - -class FatalException(Exception): - """Exception thrown on a non-recoverable error. - - If this exception is thrown by main(), it is caught by the global - layer of the program, its string representation is printed (followed - by a newline), and the program is ended with an exit code of 1.""" - - pass - - -class InternalError(Exception): - """Exception thrown in the case of a cvs2svn internal error (aka, bug).""" - - pass - - -class FatalError(FatalException): - """A FatalException that prepends error_prefix to the message.""" - - def __init__(self, msg): - """Use (error_prefix + ': ' + MSG) as the error message.""" - - FatalException.__init__(self, '%s: %s' % (error_prefix, msg,)) - - -class CommandError(FatalError): - """A FatalError caused by a failed command invocation. - - The error message includes the command name, exit code, and output.""" - - def __init__(self, command, exit_status, error_output=''): - self.command = command - self.exit_status = exit_status - self.error_output = error_output - if error_output.rstrip(): - FatalError.__init__( - self, - 'The command %r failed with exit status=%s\n' - 'and the following output:\n' - '%s' - % (self.command, self.exit_status, self.error_output.rstrip())) - else: - FatalError.__init__( - self, - 'The command %r failed with exit status=%s and no output' - % (self.command, self.exit_status)) - - -def path_join(*components): - """Join two or more pathname COMPONENTS, inserting '/' as needed. - Empty component are skipped.""" - - return '/'.join(filter(None, components)) - - -def path_split(path): - """Split the svn pathname PATH into a pair, (HEAD, TAIL). - - This is similar to os.path.split(), but always uses '/' as path - separator. PATH is an svn path, which should not start with a '/'. - HEAD is everything before the last slash, and TAIL is everything - after. If PATH ends in a slash, TAIL will be empty. If there is no - slash in PATH, HEAD will be empty. If PATH is empty, both HEAD and - TAIL are empty.""" - - pos = path.rfind('/') - if pos == -1: - return ('', path,) - else: - return (path[:pos], path[pos+1:],) - - -class IllegalSVNPathError(FatalException): - pass - - -# Control characters (characters not allowed in Subversion filenames): -ctrl_characters_regexp = re.compile('[\\\x00-\\\x1f\\\x7f]') - - -def verify_svn_filename_legal(filename): - """Verify that FILENAME is a legal filename. - - FILENAME is a path component of a CVS path. Check that it won't - choke SVN: - - - Check that it is not empty. - - - Check that it is not equal to '.' or '..'. - - - Check that the filename does not include any control characters. - - If any of these tests fail, raise an IllegalSVNPathError.""" - - if filename == '': - raise IllegalSVNPathError("Empty filename component.") - - if filename in ['.', '..']: - raise IllegalSVNPathError("Illegal filename component %r." % (filename,)) - - m = ctrl_characters_regexp.search(filename) - if m: - raise IllegalSVNPathError( - "Character %r in filename %r is not supported by Subversion." - % (m.group(), filename,) - ) - - -def verify_svn_path_legal(path): - """Verify that PATH is a legitimate SVN path. - - If not, raise an IllegalSVNPathError.""" - - if path.startswith('/'): - raise IllegalSVNPathError("Path %r must not start with '/'." % (path,)) - head = path - while head != '': - (head,tail) = path_split(head) - try: - verify_svn_filename_legal(tail) - except IllegalSVNPathError, e: - raise IllegalSVNPathError('Problem with path %r: %s' % (path, e,)) - - -def normalize_svn_path(path, allow_empty=False): - """Normalize an SVN path (e.g., one supplied by a user). - - 1. Strip leading, trailing, and duplicated '/'. - 2. If ALLOW_EMPTY is not set, verify that PATH is not empty. - - Return the normalized path. - - If the path is invalid, raise an IllegalSVNPathError.""" - - norm_path = path_join(*path.split('/')) - if not allow_empty and not norm_path: - raise IllegalSVNPathError("Path is empty") - return norm_path - - -class PathRepeatedException(Exception): - def __init__(self, path, count): - self.path = path - self.count = count - Exception.__init__( - self, 'Path %s is repeated %d times' % (self.path, self.count,) - ) - - -class PathsNestedException(Exception): - def __init__(self, nest, nestlings): - self.nest = nest - self.nestlings = nestlings - Exception.__init__( - self, - 'Path %s contains the following other paths: %s' - % (self.nest, ', '.join(self.nestlings),) - ) - - -class PathsNotDisjointException(FatalException): - """An exception that collects multiple other disjointness exceptions.""" - - def __init__(self, problems): - self.problems = problems - Exception.__init__( - self, - 'The following paths are not disjoint:\n' - ' %s\n' - % ('\n '.join([str(problem) for problem in self.problems]),) - ) - - -def verify_paths_disjoint(*paths): - """Verify that all of the paths in the argument list are disjoint. - - If any of the paths is nested in another one (i.e., in the sense - that 'a/b/c/d' is nested in 'a/b'), or any two paths are identical, - raise a PathsNotDisjointException containing exceptions detailing - the individual problems.""" - - def split(path): - if not path: - return [] - else: - return path.split('/') - - def contains(split_path1, split_path2): - """Return True iff SPLIT_PATH1 contains SPLIT_PATH2.""" - - return ( - len(split_path1) < len(split_path2) - and split_path2[:len(split_path1)] == split_path1 - ) - - paths = [(split(path), path) for path in paths] - # If all overlapping elements are equal, a shorter list is - # considered "less than" a longer one. Therefore if any paths are - # nested, this sort will leave at least one such pair adjacent, in - # the order [nest,nestling]. - paths.sort() - - problems = [] - - # Create exceptions for any repeated paths, and delete the repeats - # from the paths array: - i = 0 - while i < len(paths): - split_path, path = paths[i] - j = i + 1 - while j < len(paths) and split_path == paths[j][0]: - j += 1 - if j - i > 1: - problems.append(PathRepeatedException(path, j - i)) - # Delete all but the first copy: - del paths[i + 1:j] - i += 1 - - # Create exceptions for paths nested in each other: - i = 0 - while i < len(paths): - split_path, path = paths[i] - j = i + 1 - while j < len(paths) and contains(split_path, paths[j][0]): - j += 1 - if j - i > 1: - problems.append(PathsNestedException( - path, [path2 for (split_path2, path2) in paths[i + 1:j]] - )) - i += 1 - - if problems: - raise PathsNotDisjointException(problems) - - -def format_date(date): - """Return an svn-compatible date string for DATE (seconds since epoch). - - A Subversion date looks like '2002-09-29T14:44:59.000000Z'.""" - - return time.strftime("%Y-%m-%dT%H:%M:%S.000000Z", time.gmtime(date)) - - -class CVSTextDecoder: - """Callable that decodes CVS strings into Unicode.""" - - def __init__(self, encodings, fallback_encoding=None): - """Create a CVSTextDecoder instance. - - ENCODINGS is a list containing the names of encodings that are - attempted to be used as source encodings in 'strict' mode. - - FALLBACK_ENCODING, if specified, is the name of an encoding that - should be used as a source encoding in lossy 'replace' mode if all - of ENCODINGS failed. - - Raise LookupError if any of the specified encodings is unknown.""" - - self.decoders = [ - (encoding, codecs.lookup(encoding)[1]) - for encoding in encodings] - - if fallback_encoding is None: - self.fallback_decoder = None - else: - self.fallback_decoder = ( - fallback_encoding, codecs.lookup(fallback_encoding)[1] - ) - - def add_encoding(self, encoding): - """Add an encoding to be tried in 'strict' mode. - - ENCODING is the name of an encoding. If it is unknown, raise a - LookupError.""" - - for (name, decoder) in self.decoders: - if name == encoding: - return - else: - self.decoders.append( (encoding, codecs.lookup(encoding)[1]) ) - - def set_fallback_encoding(self, encoding): - """Set the fallback encoding, to be tried in 'replace' mode. - - ENCODING is the name of an encoding. If it is unknown, raise a - LookupError.""" - - if encoding is None: - self.fallback_decoder = None - else: - self.fallback_decoder = (encoding, codecs.lookup(encoding)[1]) - - def __call__(self, s): - """Try to decode string S using our configured source encodings. - - Return the string as a Unicode string. If S is already a unicode - string, do nothing. - - Raise UnicodeError if the string cannot be decoded using any of - the source encodings and no fallback encoding was specified.""" - - if isinstance(s, unicode): - return s - for (name, decoder) in self.decoders: - try: - return decoder(s)[0] - except ValueError: - Log().verbose("Encoding '%s' failed for string %r" % (name, s)) - - if self.fallback_decoder is not None: - (name, decoder) = self.fallback_decoder - return decoder(s, 'replace')[0] - else: - raise UnicodeError - - -class Timestamper: - """Return monotonic timestamps derived from changeset timestamps.""" - - def __init__(self): - # The last timestamp that has been returned: - self.timestamp = 0.0 - - # The maximum timestamp that is considered reasonable: - self.max_timestamp = time.time() + 24.0 * 60.0 * 60.0 - - def get(self, timestamp, change_expected): - """Return a reasonable timestamp derived from TIMESTAMP. - - Push TIMESTAMP into the future if necessary to ensure that it is - at least one second later than every other timestamp that has been - returned by previous calls to this method. - - If CHANGE_EXPECTED is not True, then log a message if the - timestamp has to be changed.""" - - if timestamp > self.max_timestamp: - # If a timestamp is in the future, it is assumed that it is - # bogus. Shift it backwards in time to prevent it forcing other - # timestamps to be pushed even further in the future. - - # Note that this is not nearly a complete solution to the bogus - # timestamp problem. A timestamp in the future still affects - # the ordering of changesets, and a changeset having such a - # timestamp will not be committed until all changesets with - # earlier timestamps have been committed, even if other - # changesets with even earlier timestamps depend on this one. - self.timestamp = self.timestamp + 1.0 - if not change_expected: - Log().warn( - 'Timestamp "%s" is in the future; changed to "%s".' - % (time.asctime(time.gmtime(timestamp)), - time.asctime(time.gmtime(self.timestamp)),) - ) - elif timestamp < self.timestamp + 1.0: - self.timestamp = self.timestamp + 1.0 - if not change_expected and Log().is_on(Log.VERBOSE): - Log().verbose( - 'Timestamp "%s" adjusted to "%s" to ensure monotonicity.' - % (time.asctime(time.gmtime(timestamp)), - time.asctime(time.gmtime(self.timestamp)),) - ) - else: - self.timestamp = timestamp - - return self.timestamp - - diff --git a/cvs2svn_lib/config.py b/cvs2svn_lib/config.py deleted file mode 100644 index b313b2c..0000000 --- a/cvs2svn_lib/config.py +++ /dev/null @@ -1,221 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2000-2008 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""This module contains various configuration constants used by cvs2svn.""" - - -SVN_KEYWORDS_VALUE = 'Author Date Id Revision' - -# The default names for the trunk/branches/tags directory for each -# project: -DEFAULT_TRUNK_BASE = 'trunk' -DEFAULT_BRANCHES_BASE = 'branches' -DEFAULT_TAGS_BASE = 'tags' - -SVNADMIN_EXECUTABLE = 'svnadmin' -CO_EXECUTABLE = 'co' -CVS_EXECUTABLE = 'cvs' -SORT_EXECUTABLE = 'sort' - -# A pickled list of the projects defined for this conversion. -PROJECTS = 'projects.pck' - -# A file holding the Serializer to be used for -# CVS_REVS_SUMMARY_*_DATAFILE and CVS_SYMBOLS_SYMMARY_*_DATAFILE: -SUMMARY_SERIALIZER = 'summary-serializer.pck' - -# The first file contains enough information about each CVSRevision to -# deduce preliminary Changesets. The second file is a sorted version -# of the first. -CVS_REVS_SUMMARY_DATAFILE = 'revs-summary.txt' -CVS_REVS_SUMMARY_SORTED_DATAFILE = 'revs-summary-s.txt' - -# The first file contains enough information about each CVSSymbol to -# deduce preliminary Changesets. The second file is a sorted version -# of the first. -CVS_SYMBOLS_SUMMARY_DATAFILE = 'symbols-summary.txt' -CVS_SYMBOLS_SUMMARY_SORTED_DATAFILE = 'symbols-summary-s.txt' - -# A mapping from CVSItem id to Changeset id. -CVS_ITEM_TO_CHANGESET = 'cvs-item-to-changeset.dat' - -# A mapping from CVSItem id to Changeset id, after the -# RevisionChangeset loops have been broken. -CVS_ITEM_TO_CHANGESET_REVBROKEN = 'cvs-item-to-changeset-revbroken.dat' - -# A mapping from CVSItem id to Changeset id, after the SymbolChangeset -# loops have been broken. -CVS_ITEM_TO_CHANGESET_SYMBROKEN = 'cvs-item-to-changeset-symbroken.dat' - -# A mapping from CVSItem id to Changeset id, after all Changeset -# loops have been broken. -CVS_ITEM_TO_CHANGESET_ALLBROKEN = 'cvs-item-to-changeset-allbroken.dat' - -# A mapping from id to Changeset. -CHANGESETS_INDEX = 'changesets-index.dat' -CHANGESETS_STORE = 'changesets.pck' - -# A mapping from id to Changeset, after the RevisionChangeset loops -# have been broken. -CHANGESETS_REVBROKEN_INDEX = 'changesets-revbroken-index.dat' -CHANGESETS_REVBROKEN_STORE = 'changesets-revbroken.pck' - -# A mapping from id to Changeset, after the RevisionChangesets have -# been sorted and converted into OrderedChangesets. -CHANGESETS_REVSORTED_INDEX = 'changesets-revsorted-index.dat' -CHANGESETS_REVSORTED_STORE = 'changesets-revsorted.pck' - -# A mapping from id to Changeset, after the SymbolChangeset loops have -# been broken. -CHANGESETS_SYMBROKEN_INDEX = 'changesets-symbroken-index.dat' -CHANGESETS_SYMBROKEN_STORE = 'changesets-symbroken.pck' - -# A mapping from id to Changeset, after all Changeset loops have been -# broken. -CHANGESETS_ALLBROKEN_INDEX = 'changesets-allbroken-index.dat' -CHANGESETS_ALLBROKEN_STORE = 'changesets-allbroken.pck' - -# The RevisionChangesets in commit order. Each line contains the -# changeset id and timestamp of one changeset, in hexadecimal, in the -# order that the changesets should be committed to svn. -CHANGESETS_SORTED_DATAFILE = 'changesets-s.txt' - -# A file containing a marshalled copy of all the statistics that have -# been gathered so far is written at the end of each pass as a -# marshalled dictionary. This is the pattern used to generate the -# filenames. -STATISTICS_FILE = 'statistics-%02d.pck' - -# This text file contains records (1 per line) that describe openings -# and closings for copies to tags and branches. The format is as -# follows: -# -# SYMBOL_ID SVN_REVNUM TYPE CVS_SYMBOL_ID -# -# where type is either OPENING or CLOSING. CVS_SYMBOL_ID is the id of -# the CVSSymbol whose opening or closing is being described (in hex). -SYMBOL_OPENINGS_CLOSINGS = 'symbolic-names.txt' -# A sorted version of the above file. SYMBOL_ID and SVN_REVNUM are -# the primary and secondary sorting criteria. It is important that -# SYMBOL_IDs be located together to make it quick to read them at -# once. The order of SVN_REVNUM is only important because it is -# assumed by some internal consistency checks. -SYMBOL_OPENINGS_CLOSINGS_SORTED = 'symbolic-names-s.txt' - -# Skeleton version of the repository filesystem. See class -# RepositoryMirror for how these work. -MIRROR_NODES_INDEX_TABLE = 'mirror-nodes-index.dat' -MIRROR_NODES_STORE = 'mirror-nodes.pck' - -# Offsets pointing to the beginning of each symbol's records in -# SYMBOL_OPENINGS_CLOSINGS_SORTED. This file contains a pickled map -# from symbol_id to file offset. -SYMBOL_OFFSETS_DB = 'symbol-offsets.pck' - -# Pickled map of CVSFile.id to instance. -CVS_FILES_DB = 'cvs-files.pck' - -# A series of records. The first is a pickled serializer. Each -# subsequent record is a serialized list of all CVSItems applying to a -# CVSFile. -CVS_ITEMS_STORE = 'cvs-items.pck' - -# The same as above, but with the CVSItems ordered in groups based on -# their initial changesets. CVSItems will usually be accessed one -# changeset at a time, so this ordering helps disk locality (even -# though some of the changesets will later be broken up). -CVS_ITEMS_SORTED_INDEX_TABLE = 'cvs-items-sorted-index.dat' -CVS_ITEMS_SORTED_STORE = 'cvs-items-sorted.pck' - -# A record of all symbolic names that will be processed in the -# conversion. This file contains a pickled list of TypedSymbol -# objects. -SYMBOL_DB = 'symbols.pck' - -# A pickled list of the statistics for all symbols. Each entry in the -# list is an instance of cvs2svn_lib.symbol_statistics._Stats. -SYMBOL_STATISTICS = 'symbol-statistics.pck' - -# These two databases provide a bidirectional mapping between -# CVSRevision.ids (in hex) and Subversion revision numbers. -# -# The first maps CVSRevision.id to the SVN revision number of which it -# is a part (more than one CVSRevision can map to the same SVN -# revision number). -# -# The second maps Subversion revision numbers (as hex strings) to -# pickled SVNCommit instances. -CVS_REVS_TO_SVN_REVNUMS = 'cvs-revs-to-svn-revnums.dat' - -# This database maps Subversion revision numbers to pickled SVNCommit -# instances. -SVN_COMMITS_INDEX_TABLE = 'svn-commits-index.dat' -SVN_COMMITS_STORE = 'svn-commits.pck' - -# How many bytes to read at a time from a pipe. 128 kiB should be -# large enough to be efficient without wasting too much memory. -PIPE_READ_SIZE = 128 * 1024 - -# Records the author and log message for each changeset. The database -# contains a map metadata_id -> (author, logmessage). Each -# CVSRevision that is eligible to be combined into the same SVN commit -# is assigned the same id. Note that the (author, logmessage) pairs -# are not necessarily all distinct; other data are taken into account -# when constructing ids. -METADATA_INDEX_TABLE = 'metadata-index.dat' -METADATA_STORE = 'metadata.pck' - -# The same, after it has been cleaned up for the chosen output option: -METADATA_CLEAN_INDEX_TABLE = 'metadata-clean-index.dat' -METADATA_CLEAN_STORE = 'metadata-clean.pck' - -# The following four databases are used in conjunction with --use-internal-co. - -# Records the RCS deltas for all CVS revisions. The deltas are to be -# applied forward, i.e. those from trunk are reversed wrt RCS. -RCS_DELTAS_INDEX_TABLE = 'rcs-deltas-index.dat' -RCS_DELTAS_STORE = 'rcs-deltas.pck' - -# Records the revision tree of each RCS file. The format is a list of -# list of integers. The outer list holds lines of development, the inner list -# revisions within the LODs, revisions are CVSItem ids. Branches "closer -# to the trunk" appear later. Revisions are sorted by reverse chronological -# order. The last revision of each branch is the revision it sprouts from. -# Revisions that represent deletions at the end of a branch are omitted. -RCS_TREES_INDEX_TABLE = 'rcs-trees-index.dat' -RCS_TREES_STORE = 'rcs-trees.pck' - -# Records the revision tree of each RCS file after removing revisions -# belonging to excluded branches. Note that the branch ordering is arbitrary -# in this file. -RCS_TREES_FILTERED_INDEX_TABLE = 'rcs-trees-filtered-index.dat' -RCS_TREES_FILTERED_STORE = 'rcs-trees-filtered.pck' - -# At any given time during OutputPass, holds the full text of each CVS -# revision that was checked out already and still has descendants that will -# be checked out. -CVS_CHECKOUT_DB = 'cvs-checkout.db' - -# End of DBs related to --use-internal-co. - -# If this run will output directly to a Subversion repository, then -# this is the name of the file that each revision will temporarily be -# written to prior to writing it into the repository. -DUMPFILE = 'svn.dump' - -# flush a commit if a 5 minute gap occurs. -COMMIT_THRESHOLD = 5 * 60 - diff --git a/cvs2svn_lib/context.py b/cvs2svn_lib/context.py deleted file mode 100644 index 89dc16a..0000000 --- a/cvs2svn_lib/context.py +++ /dev/null @@ -1,93 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2000-2009 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""Store the context (options, etc) for a cvs2svn run.""" - - -import os - -from cvs2svn_lib import config -from cvs2svn_lib.common import CVSTextDecoder - - -class Ctx: - """Session state for this run of cvs2svn. For example, run-time - options are stored here. This class is a Borg (see - http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66531).""" - - __shared_state = { } - - def __init__(self): - self.__dict__ = self.__shared_state - if self.__dict__: - return - # Else, initialize to defaults. - self.set_defaults() - - def set_defaults(self): - """Set all parameters to their default values.""" - - self.output_option = None - self.dry_run = False - self.revision_recorder = None - self.revision_excluder = None - self.revision_reader = None - self.svnadmin_executable = config.SVNADMIN_EXECUTABLE - self.sort_executable = config.SORT_EXECUTABLE - self.trunk_only = False - self.prune = True - self.cvs_author_decoder = CVSTextDecoder(['ascii']) - self.cvs_log_decoder = CVSTextDecoder(['ascii']) - self.cvs_filename_decoder = CVSTextDecoder(['ascii']) - self.decode_apple_single = False - self.symbol_info_filename = None - self.username = None - self.svn_property_setters = [] - self.tmpdir = 'cvs2svn-tmp' - self.skip_cleanup = False - self.keep_cvsignore = False - self.cross_project_commits = True - self.cross_branch_commits = True - self.retain_conflicting_attic_files = False - - self.initial_project_commit_message = ( - 'Standard project directories initialized by cvs2svn.' - ) - self.post_commit_message = ( - 'This commit was generated by cvs2svn to compensate for ' - 'changes in r%(revnum)d, which included commits to RCS files ' - 'with non-trunk default branches.' - ) - self.symbol_commit_message = ( - "This commit was manufactured by cvs2svn to create %(symbol_type)s " - "'%(symbol_name)s'." - ) - - - def get_temp_filename(self, basename): - return os.path.join(self.tmpdir, basename) - - def clean(self): - """Dispose of items in our dictionary that are not intended to - live past the end of a pass (identified by exactly one leading - underscore).""" - - for attr in self.__dict__.keys(): - if (attr.startswith('_') and not attr.startswith('__') - and not attr.startswith('_Ctx__')): - delattr(self, attr) - - diff --git a/cvs2svn_lib/cvs_file.py b/cvs2svn_lib/cvs_file.py deleted file mode 100644 index 3a1bb4f..0000000 --- a/cvs2svn_lib/cvs_file.py +++ /dev/null @@ -1,287 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2000-2008 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""This module contains a class to store information about a CVS file.""" - -import os - -from cvs2svn_lib.common import path_join -from cvs2svn_lib.context import Ctx - - -class CVSPath(object): - """Represent a CVS file or directory. - - Members: - - id -- (int) unique ID for this CVSPath. At any moment, there is - at most one CVSPath instance with a particular ID. (This - means that object identity is the same as object equality, and - objects can be used as map keys even though they don't have a - __hash__() method). - - project -- (Project) the project containing this CVSPath. - - parent_directory -- (CVSDirectory or None) the CVSDirectory - containing this CVSPath. - - basename -- (string) the base name of this CVSPath (no ',v'). The - basename of the root directory of a project is ''. - - ordinal -- (int) the order that this instance should be sorted - relative to other CVSPath instances. This member is set based - on the ordering imposed by slow_compare() by CollectData after - all CVSFiles have been processed. Comparisons of CVSPath - using __cmp__() simply compare the ordinals. - - """ - - __slots__ = [ - 'id', - 'project', - 'parent_directory', - 'basename', - 'ordinal', - ] - - def __init__(self, id, project, parent_directory, basename): - self.id = id - self.project = project - self.parent_directory = parent_directory - self.basename = basename - - def __getstate__(self): - """This method must only be called after ordinal has been set.""" - - return ( - self.id, self.project.id, - self.parent_directory, self.basename, - self.ordinal, - ) - - def __setstate__(self, state): - ( - self.id, project_id, - self.parent_directory, self.basename, - self.ordinal, - ) = state - self.project = Ctx()._projects[project_id] - - def get_ancestry(self): - """Return a list of the CVSPaths leading from the root path to SELF. - - Return the CVSPaths in a list, starting with - self.project.get_root_cvs_directory() and ending with self.""" - - ancestry = [] - p = self - while p is not None: - ancestry.append(p) - p = p.parent_directory - - ancestry.reverse() - return ancestry - - def get_cvs_path(self): - """Return the canonical path within the Project. - - The canonical path: - - - Uses forward slashes - - - Doesn't include ',v' for files - - - This doesn't include the 'Attic' segment of the path unless the - file is to be left in an Attic directory in the SVN repository; - i.e., if a filename exists in and out of Attic and the - --retain-conflicting-attic-files option was specified. - - """ - - return path_join(*[p.basename for p in self.get_ancestry()[1:]]) - - cvs_path = property(get_cvs_path) - - def _get_dir_components(self): - """Return a list containing the components of the path leading to SELF. - - The return value contains the base names of all of the parent - directories (except for the root directory) and SELF.""" - - return [p.basename for p in self.get_ancestry()[1:]] - - def __eq__(a, b): - """Compare two CVSPath instances for equality. - - This method is supplied to avoid using __cmp__() for comparing for - equality.""" - - return a is b - - def slow_compare(a, b): - return ( - # Sort first by project: - cmp(a.project, b.project) - # Then by directory components: - or cmp(a._get_dir_components(), b._get_dir_components()) - ) - - def __cmp__(a, b): - """This method must only be called after ordinal has been set.""" - - return cmp(a.ordinal, b.ordinal) - - -class CVSDirectory(CVSPath): - """Represent a CVS directory. - - Members: - - id -- (int or None) unique id for this file. If None, a new id is - generated. - - project -- (Project) the project containing this file. - - parent_directory -- (CVSDirectory or None) the CVSDirectory - containing this CVSDirectory. - - basename -- (string) the base name of this CVSDirectory (no ',v'). - - """ - - __slots__ = [] - - def __init__(self, id, project, parent_directory, basename): - """Initialize a new CVSDirectory object.""" - - CVSPath.__init__(self, id, project, parent_directory, basename) - - def get_filename(self): - """Return the filesystem path to this CVSPath in the CVS repository.""" - - if self.parent_directory is None: - return self.project.project_cvs_repos_path - else: - return os.path.join( - self.parent_directory.get_filename(), self.basename - ) - - filename = property(get_filename) - - def __getstate__(self): - return CVSPath.__getstate__(self) - - def __setstate__(self, state): - CVSPath.__setstate__(self, state) - - def __str__(self): - """For convenience only. The format is subject to change at any time.""" - - return self.cvs_path + '/' - - def __repr__(self): - return 'CVSDirectory<%x>(%r)' % (self.id, str(self),) - - -class CVSFile(CVSPath): - """Represent a CVS file. - - Members: - - id -- (int) unique id for this file. - - project -- (Project) the project containing this file. - - parent_directory -- (CVSDirectory) the CVSDirectory containing - this CVSFile. - - basename -- (string) the base name of this CVSFile (no ',v'). - - _in_attic -- (bool) True if RCS file is in an Attic subdirectory - that is not considered the parent directory. (If a file is - in-and-out-of-attic and one copy is to be left in Attic after - the conversion, then the Attic directory is that file's - PARENT_DIRECTORY and _IN_ATTIC is False.) - - executable -- (bool) True iff RCS file has executable bit set. - - file_size -- (long) size of the RCS file in bytes. - - mode -- (string or None) 'kkv', 'kb', etc. - - PARENT_DIRECTORY might contain an 'Attic' component if it should be - retained in the SVN repository; i.e., if the same filename exists out - of Attic and the --retain-conflicting-attic-files option was specified. - - """ - - __slots__ = [ - '_in_attic', - 'executable', - 'file_size', - 'mode', - ] - - def __init__( - self, id, project, parent_directory, basename, in_attic, - executable, file_size, mode - ): - """Initialize a new CVSFile object.""" - - CVSPath.__init__(self, id, project, parent_directory, basename) - self._in_attic = in_attic - self.executable = executable - self.file_size = file_size - self.mode = mode - - assert self.parent_directory is not None - - def get_filename(self): - """Return the filesystem path to this CVSPath in the CVS repository.""" - - if self._in_attic: - return os.path.join( - self.parent_directory.filename, 'Attic', self.basename + ',v' - ) - else: - return os.path.join( - self.parent_directory.filename, self.basename + ',v' - ) - - filename = property(get_filename) - - def __getstate__(self): - return ( - CVSPath.__getstate__(self), - self._in_attic, self.executable, self.file_size, self.mode, - ) - - def __setstate__(self, state): - ( - cvs_path_state, - self._in_attic, self.executable, self.file_size, self.mode, - ) = state - CVSPath.__setstate__(self, cvs_path_state) - - def __str__(self): - """For convenience only. The format is subject to change at any time.""" - - return self.cvs_path - - def __repr__(self): - return 'CVSFile<%x>(%r)' % (self.id, str(self),) - - diff --git a/cvs2svn_lib/cvs_file_database.py b/cvs2svn_lib/cvs_file_database.py deleted file mode 100644 index 61eebf3..0000000 --- a/cvs2svn_lib/cvs_file_database.py +++ /dev/null @@ -1,75 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2000-2009 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""This module contains database facilities used by cvs2svn.""" - - -import cPickle - -from cvs2svn_lib import config -from cvs2svn_lib.common import DB_OPEN_READ -from cvs2svn_lib.common import DB_OPEN_NEW -from cvs2svn_lib.artifact_manager import artifact_manager - - -class CVSFileDatabase: - """A database to store CVSFile objects and retrieve them by their id.""" - - def __init__(self, mode): - """Initialize an instance, opening database in MODE (where MODE is - either DB_OPEN_NEW or DB_OPEN_READ).""" - - self.mode = mode - - # A map { id : CVSFile } - self._cvs_files = {} - - if self.mode == DB_OPEN_NEW: - pass - elif self.mode == DB_OPEN_READ: - f = open(artifact_manager.get_temp_file(config.CVS_FILES_DB), 'rb') - cvs_files = cPickle.load(f) - for cvs_file in cvs_files: - self._cvs_files[cvs_file.id] = cvs_file - else: - raise RuntimeError('Invalid mode %r' % self.mode) - - def log_file(self, cvs_file): - """Add CVS_FILE, a CVSFile instance, to the database.""" - - if self.mode == DB_OPEN_READ: - raise RuntimeError('Cannot write items in mode %r' % self.mode) - - self._cvs_files[cvs_file.id] = cvs_file - - def itervalues(self): - for value in self._cvs_files.itervalues(): - yield value - - def get_file(self, id): - """Return the CVSFile with the specified ID.""" - - return self._cvs_files[id] - - def close(self): - if self.mode == DB_OPEN_NEW: - f = open(artifact_manager.get_temp_file(config.CVS_FILES_DB), 'wb') - cPickle.dump(self._cvs_files.values(), f, -1) - f.close() - - self._cvs_files = None - - diff --git a/cvs2svn_lib/cvs_file_items.py b/cvs2svn_lib/cvs_file_items.py deleted file mode 100644 index f0dc782..0000000 --- a/cvs2svn_lib/cvs_file_items.py +++ /dev/null @@ -1,1075 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2006-2008 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""This module contains a class to manage the CVSItems related to one file.""" - - -import re - -from cvs2svn_lib.common import InternalError -from cvs2svn_lib.common import FatalError -from cvs2svn_lib.context import Ctx -from cvs2svn_lib.log import Log -from cvs2svn_lib.symbol import Trunk -from cvs2svn_lib.symbol import Branch -from cvs2svn_lib.symbol import Tag -from cvs2svn_lib.symbol import ExcludedSymbol -from cvs2svn_lib.cvs_item import CVSRevision -from cvs2svn_lib.cvs_item import CVSRevisionModification -from cvs2svn_lib.cvs_item import CVSRevisionAbsent -from cvs2svn_lib.cvs_item import CVSRevisionNoop -from cvs2svn_lib.cvs_item import CVSSymbol -from cvs2svn_lib.cvs_item import CVSBranch -from cvs2svn_lib.cvs_item import CVSTag -from cvs2svn_lib.cvs_item import cvs_revision_type_map -from cvs2svn_lib.cvs_item import cvs_branch_type_map -from cvs2svn_lib.cvs_item import cvs_tag_type_map - - -class VendorBranchError(Exception): - """There is an error in the structure of the file revision tree.""" - - pass - - -class LODItems(object): - def __init__(self, lod, cvs_branch, cvs_revisions, cvs_branches, cvs_tags): - # The LineOfDevelopment described by this instance. - self.lod = lod - - # The CVSBranch starting this LOD, if any; otherwise, None. - self.cvs_branch = cvs_branch - - # The list of CVSRevisions on this LOD, if any. The CVSRevisions - # are listed in dependency order. - self.cvs_revisions = cvs_revisions - - # A list of CVSBranches that sprout from this LOD (either from - # cvs_branch or from one of the CVSRevisions). - self.cvs_branches = cvs_branches - - # A list of CVSTags that sprout from this LOD (either from - # cvs_branch or from one of the CVSRevisions). - self.cvs_tags = cvs_tags - - def is_trivial_import(self): - """Return True iff this LOD is a trivial import branch in this file. - - A trivial import branch is a branch that was used for a single - import and nothing else. Such a branch is eligible for being - grafted onto trunk, even if it has branch blockers.""" - - return ( - len(self.cvs_revisions) == 1 - and self.cvs_revisions[0].ntdbr - ) - - def is_pure_ntdb(self): - """Return True iff this LOD is a pure NTDB in this file. - - A pure non-trunk default branch is defined to be a branch that - contains only NTDB revisions (and at least one of them). Such a - branch is eligible for being grafted onto trunk, even if it has - branch blockers.""" - - return ( - self.cvs_revisions - and self.cvs_revisions[-1].ntdbr - ) - - def iter_blockers(self): - if self.is_pure_ntdb(): - # Such a branch has no blockers, because the blockers can be - # grafted to trunk. - pass - else: - # Other branches are only blocked by symbols that sprout from - # non-NTDB revisions: - non_ntdbr_revision_ids = set() - for cvs_revision in self.cvs_revisions: - if not cvs_revision.ntdbr: - non_ntdbr_revision_ids.add(cvs_revision.id) - - for cvs_tag in self.cvs_tags: - if cvs_tag.source_id in non_ntdbr_revision_ids: - yield cvs_tag - - for cvs_branch in self.cvs_branches: - if cvs_branch.source_id in non_ntdbr_revision_ids: - yield cvs_branch - - -class CVSFileItems(object): - def __init__(self, cvs_file, trunk, cvs_items): - # The file whose data this instance holds. - self.cvs_file = cvs_file - - # The symbol that represents "Trunk" in this file. - self.trunk = trunk - - # A map from CVSItem.id to CVSItem: - self._cvs_items = {} - - # The cvs_item_id of each root in the CVSItem forest. (A root is - # defined to be any CVSRevision with no prev_id.) - self.root_ids = set() - - for cvs_item in cvs_items: - self.add(cvs_item) - if isinstance(cvs_item, CVSRevision) and cvs_item.prev_id is None: - self.root_ids.add(cvs_item.id) - - def __getstate__(self): - return (self.cvs_file.id, self.values(),) - - def __setstate__(self, state): - (cvs_file_id, cvs_items,) = state - cvs_file = Ctx()._cvs_file_db.get_file(cvs_file_id) - CVSFileItems.__init__( - self, cvs_file, cvs_file.project.get_trunk(), cvs_items, - ) - - def add(self, cvs_item): - self._cvs_items[cvs_item.id] = cvs_item - - def __getitem__(self, id): - """Return the CVSItem with the specified ID.""" - - return self._cvs_items[id] - - def get(self, id, default=None): - return self._cvs_items.get(id, default) - - def __delitem__(self, id): - assert id not in self.root_ids - del self._cvs_items[id] - - def values(self): - return self._cvs_items.values() - - def check_link_consistency(self): - """Check that the CVSItems are linked correctly with each other.""" - - for cvs_item in self.values(): - try: - cvs_item.check_links(self) - except AssertionError: - Log().error( - 'Link consistency error in %s\n' - 'This is probably a bug internal to cvs2svn. Please file a bug\n' - 'report including the following stack trace (see FAQ for more ' - 'info).' - % (cvs_item,)) - raise - - def _get_lod(self, lod, cvs_branch, start_id): - """Return the indicated LODItems. - - LOD is the corresponding LineOfDevelopment. CVS_BRANCH is the - CVSBranch instance that starts the LOD if any; otherwise it is - None. START_ID is the id of the first CVSRevision on this LOD, or - None if there are none.""" - - cvs_revisions = [] - cvs_branches = [] - cvs_tags = [] - - def process_subitems(cvs_item): - """Process the branches and tags that are rooted in CVS_ITEM. - - CVS_ITEM can be a CVSRevision or a CVSBranch.""" - - for branch_id in cvs_item.branch_ids[:]: - cvs_branches.append(self[branch_id]) - - for tag_id in cvs_item.tag_ids: - cvs_tags.append(self[tag_id]) - - if cvs_branch is not None: - # Include the symbols sprouting directly from the CVSBranch: - process_subitems(cvs_branch) - - id = start_id - while id is not None: - cvs_rev = self[id] - cvs_revisions.append(cvs_rev) - process_subitems(cvs_rev) - id = cvs_rev.next_id - - return LODItems(lod, cvs_branch, cvs_revisions, cvs_branches, cvs_tags) - - def get_lod_items(self, cvs_branch): - """Return an LODItems describing the branch that starts at CVS_BRANCH. - - CVS_BRANCH must be an instance of CVSBranch contained in this - CVSFileItems.""" - - return self._get_lod(cvs_branch.symbol, cvs_branch, cvs_branch.next_id) - - def iter_root_lods(self): - """Iterate over the LODItems for all root LODs (non-recursively).""" - - for id in list(self.root_ids): - cvs_item = self[id] - if isinstance(cvs_item, CVSRevision): - # This LOD doesn't have a CVSBranch associated with it. - # Either it is Trunk, or it is a branch whose CVSBranch has - # been deleted. - yield self._get_lod(cvs_item.lod, None, id) - elif isinstance(cvs_item, CVSBranch): - # This is a Branch that has been severed from the rest of the - # tree. - yield self._get_lod(cvs_item.symbol, cvs_item, cvs_item.next_id) - else: - raise InternalError('Unexpected root item: %s' % (cvs_item,)) - - def _iter_tree(self, lod, cvs_branch, start_id): - """Iterate over the tree that starts at the specified line of development. - - LOD is the LineOfDevelopment where the iteration should start. - CVS_BRANCH is the CVSBranch instance that starts the LOD if any; - otherwise it is None. ID is the id of the first CVSRevision on - this LOD, or None if there are none. - - There are two cases handled by this routine: trunk (where LOD is a - Trunk instance, CVS_BRANCH is None, and ID is the id of the 1.1 - revision) and a branch (where LOD is a Branch instance, CVS_BRANCH - is a CVSBranch instance, and ID is either the id of the first - CVSRevision on the branch or None if there are no CVSRevisions on - the branch). Note that CVS_BRANCH and ID cannot simultaneously be - None. - - Yield an LODItems instance for each line of development.""" - - cvs_revisions = [] - cvs_branches = [] - cvs_tags = [] - - def process_subitems(cvs_item): - """Process the branches and tags that are rooted in CVS_ITEM. - - CVS_ITEM can be a CVSRevision or a CVSBranch.""" - - for branch_id in cvs_item.branch_ids[:]: - # Recurse into the branch: - branch = self[branch_id] - for lod_items in self._iter_tree( - branch.symbol, branch, branch.next_id - ): - yield lod_items - # The caller might have deleted the branch that we just - # yielded. If it is no longer present, then do not add it to - # the list of cvs_branches. - try: - cvs_branches.append(self[branch_id]) - except KeyError: - pass - - for tag_id in cvs_item.tag_ids: - cvs_tags.append(self[tag_id]) - - if cvs_branch is not None: - # Include the symbols sprouting directly from the CVSBranch: - for lod_items in process_subitems(cvs_branch): - yield lod_items - - id = start_id - while id is not None: - cvs_rev = self[id] - cvs_revisions.append(cvs_rev) - - for lod_items in process_subitems(cvs_rev): - yield lod_items - - id = cvs_rev.next_id - - yield LODItems(lod, cvs_branch, cvs_revisions, cvs_branches, cvs_tags) - - def iter_lods(self): - """Iterate over LinesOfDevelopment in this file, in depth-first order. - - For each LOD, yield an LODItems instance. The traversal starts at - each root node but returns the LODs in depth-first order. - - It is allowed to modify the CVSFileItems instance while the - traversal is occurring, but only in ways that don't affect the - tree structure above (i.e., towards the trunk from) the current - LOD.""" - - # Make a list out of root_ids so that callers can change it: - for id in list(self.root_ids): - cvs_item = self[id] - if isinstance(cvs_item, CVSRevision): - # This LOD doesn't have a CVSBranch associated with it. - # Either it is Trunk, or it is a branch whose CVSBranch has - # been deleted. - lod = cvs_item.lod - cvs_branch = None - elif isinstance(cvs_item, CVSBranch): - # This is a Branch that has been severed from the rest of the - # tree. - lod = cvs_item.symbol - id = cvs_item.next_id - cvs_branch = cvs_item - else: - raise InternalError('Unexpected root item: %s' % (cvs_item,)) - - for lod_items in self._iter_tree(lod, cvs_branch, id): - yield lod_items - - def iter_deltatext_ancestors(self, cvs_rev): - """Generate the delta-dependency ancestors of CVS_REV. - - Generate then ancestors of CVS_REV in deltatext order; i.e., back - along branches towards trunk, then outwards along trunk towards - HEAD.""" - - while True: - # Determine the next candidate source revision: - if isinstance(cvs_rev.lod, Trunk): - if cvs_rev.next_id is None: - # HEAD has no ancestors, so we are done: - return - else: - cvs_rev = self[cvs_rev.next_id] - else: - cvs_rev = self[cvs_rev.prev_id] - - yield cvs_rev - - def _sever_branch(self, lod_items): - """Sever the branch from its source and discard the CVSBranch. - - LOD_ITEMS describes a branch that should be severed from its - source, deleting the CVSBranch and creating a new root. Also set - LOD_ITEMS.cvs_branch to none. - - This method can only be used before symbols have been grafted onto - CVSBranches. It does not adjust NTDBR, NTDBR_PREV_ID or - NTDBR_NEXT_ID even if LOD_ITEMS describes a NTDB.""" - - cvs_branch = lod_items.cvs_branch - assert cvs_branch is not None - assert not cvs_branch.tag_ids - assert not cvs_branch.branch_ids - source_rev = self[cvs_branch.source_id] - - # We only cover the following case, even though after - # FilterSymbolsPass cvs_branch.source_id might refer to another - # CVSBranch. - assert isinstance(source_rev, CVSRevision) - - # Delete the CVSBranch itself: - lod_items.cvs_branch = None - del self[cvs_branch.id] - - # Delete the reference from the source revision to the CVSBranch: - source_rev.branch_ids.remove(cvs_branch.id) - - # Delete the reference from the first revision on the branch to - # the CVSBranch: - if lod_items.cvs_revisions: - first_rev = lod_items.cvs_revisions[0] - - # Delete the reference from first_rev to the CVSBranch: - first_rev.first_on_branch_id = None - - # Delete the reference from the source revision to the first - # revision on the branch: - source_rev.branch_commit_ids.remove(first_rev.id) - - # ...and vice versa: - first_rev.prev_id = None - - # Change the type of first_rev (e.g., from Change to Add): - first_rev.__class__ = cvs_revision_type_map[ - (isinstance(first_rev, CVSRevisionModification), False,) - ] - - # Now first_rev is a new root: - self.root_ids.add(first_rev.id) - - def adjust_ntdbrs(self, ntdbr_cvs_revs): - """Adjust the specified non-trunk default branch revisions. - - NTDBR_CVS_REVS is a list of CVSRevision instances in this file - that have been determined to be non-trunk default branch - revisions. - - The first revision on the default branch is handled strangely by - CVS. If a file is imported (as opposed to being added), CVS - creates a 1.1 revision, then creates a vendor branch 1.1.1 based - on 1.1, then creates a 1.1.1.1 revision that is identical to the - 1.1 revision (i.e., its deltatext is empty). The log message that - the user typed when importing is stored with the 1.1.1.1 revision. - The 1.1 revision always contains a standard, generated log - message, 'Initial revision\n'. - - When we detect a straightforward import like this, we want to - handle it by deleting the 1.1 revision (which doesn't contain any - useful information) and making 1.1.1.1 into an independent root in - the file's dependency tree. In SVN, 1.1.1.1 will be added - directly to the vendor branch with its initial content. Then in a - special 'post-commit', the 1.1.1.1 revision is copied back to - trunk. - - If the user imports again to the same vendor branch, then CVS - creates revisions 1.1.1.2, 1.1.1.3, etc. on the vendor branch, - *without* counterparts in trunk (even though these revisions - effectively play the role of trunk revisions). So after we add - such revisions to the vendor branch, we also copy them back to - trunk in post-commits. - - Set the ntdbr members of the revisions listed in NTDBR_CVS_REVS to - True. Also, if there is a 1.2 revision, then set that revision to - depend on the last non-trunk default branch revision and possibly - adjust its type accordingly.""" - - for cvs_rev in ntdbr_cvs_revs: - cvs_rev.ntdbr = True - - # Look for a 1.2 revision: - rev_1_1 = self[ntdbr_cvs_revs[0].prev_id] - - rev_1_2 = self.get(rev_1_1.next_id) - if rev_1_2 is not None: - # Revision 1.2 logically follows the imported revisions, not - # 1.1. Accordingly, connect it to the last NTDBR and possibly - # change its type. - last_ntdbr = ntdbr_cvs_revs[-1] - rev_1_2.ntdbr_prev_id = last_ntdbr.id - last_ntdbr.ntdbr_next_id = rev_1_2.id - rev_1_2.__class__ = cvs_revision_type_map[( - isinstance(rev_1_2, CVSRevisionModification), - isinstance(last_ntdbr, CVSRevisionModification), - )] - - def process_live_ntdb(self, vendor_lod_items): - """VENDOR_LOD_ITEMS is a live default branch; process it. - - In this case, all revisions on the default branch are NTDBRs and - it is an error if there is also a '1.2' revision. - - Return True iff this transformation really does something. Raise - a VendorBranchError if there is a '1.2' revision.""" - - rev_1_1 = self[vendor_lod_items.cvs_branch.source_id] - rev_1_2_id = rev_1_1.next_id - if rev_1_2_id is not None: - raise VendorBranchError( - 'File \'%s\' has default branch=%s but also a revision %s' - % (self.cvs_file.filename, - vendor_lod_items.cvs_branch.branch_number, self[rev_1_2_id].rev,) - ) - - ntdbr_cvs_revs = list(vendor_lod_items.cvs_revisions) - - if ntdbr_cvs_revs: - self.adjust_ntdbrs(ntdbr_cvs_revs) - return True - else: - return False - - def process_historical_ntdb(self, vendor_lod_items): - """There appears to have been a non-trunk default branch in the past. - - There is currently no default branch, but the branch described by - file appears to have been imported. So our educated guess is that - all revisions on the '1.1.1' branch (described by - VENDOR_LOD_ITEMS) with timestamps prior to the timestamp of '1.2' - were non-trunk default branch revisions. - - Return True iff this transformation really does something. - - This really only handles standard '1.1.1.*'-style vendor - revisions. One could conceivably have a file whose default branch - is 1.1.3 or whatever, or was that at some point in time, with - vendor revisions 1.1.3.1, 1.1.3.2, etc. But with the default - branch gone now, we'd have no basis for assuming that the - non-standard vendor branch had ever been the default branch - anyway. - - Note that we rely on comparisons between the timestamps of the - revisions on the vendor branch and that of revision 1.2, even - though the timestamps might be incorrect due to clock skew. We - could do a slightly better job if we used the changeset - timestamps, as it is possible that the dependencies that went into - determining those timestamps are more accurate. But that would - require an extra pass or two.""" - - rev_1_1 = self[vendor_lod_items.cvs_branch.source_id] - rev_1_2_id = rev_1_1.next_id - - if rev_1_2_id is None: - rev_1_2_timestamp = None - else: - rev_1_2_timestamp = self[rev_1_2_id].timestamp - - ntdbr_cvs_revs = [] - for cvs_rev in vendor_lod_items.cvs_revisions: - if rev_1_2_timestamp is not None \ - and cvs_rev.timestamp >= rev_1_2_timestamp: - # That's the end of the once-default branch. - break - ntdbr_cvs_revs.append(cvs_rev) - - if ntdbr_cvs_revs: - self.adjust_ntdbrs(ntdbr_cvs_revs) - return True - else: - return False - - def imported_remove_1_1(self, vendor_lod_items): - """This file was imported. Remove the 1.1 revision if possible. - - VENDOR_LOD_ITEMS is the LODItems instance for the vendor branch. - See adjust_ntdbrs() for more information.""" - - assert vendor_lod_items.cvs_revisions - cvs_rev = vendor_lod_items.cvs_revisions[0] - - if isinstance(cvs_rev, CVSRevisionModification) \ - and not cvs_rev.deltatext_exists: - cvs_branch = vendor_lod_items.cvs_branch - rev_1_1 = self[cvs_branch.source_id] - assert isinstance(rev_1_1, CVSRevision) - Log().debug('Removing unnecessary revision %s' % (rev_1_1,)) - - # Delete the 1.1.1 CVSBranch and sever the vendor branch from trunk: - self._sever_branch(vendor_lod_items) - - # Delete rev_1_1: - self.root_ids.remove(rev_1_1.id) - del self[rev_1_1.id] - rev_1_2_id = rev_1_1.next_id - if rev_1_2_id is not None: - rev_1_2 = self[rev_1_2_id] - rev_1_2.prev_id = None - self.root_ids.add(rev_1_2.id) - - # Move any tags and branches from rev_1_1 to cvs_rev: - cvs_rev.tag_ids.extend(rev_1_1.tag_ids) - for id in rev_1_1.tag_ids: - cvs_tag = self[id] - cvs_tag.source_lod = cvs_rev.lod - cvs_tag.source_id = cvs_rev.id - cvs_rev.branch_ids[0:0] = rev_1_1.branch_ids - for id in rev_1_1.branch_ids: - cvs_branch = self[id] - cvs_branch.source_lod = cvs_rev.lod - cvs_branch.source_id = cvs_rev.id - cvs_rev.branch_commit_ids[0:0] = rev_1_1.branch_commit_ids - for id in rev_1_1.branch_commit_ids: - cvs_rev2 = self[id] - cvs_rev2.prev_id = cvs_rev.id - - def _delete_unneeded(self, cvs_item, metadata_db): - if isinstance(cvs_item, CVSRevisionNoop) \ - and cvs_item.rev == '1.1' \ - and isinstance(cvs_item.lod, Trunk) \ - and len(cvs_item.branch_ids) >= 1 \ - and self[cvs_item.branch_ids[0]].next_id is not None \ - and not cvs_item.closed_symbols \ - and not cvs_item.ntdbr: - # FIXME: This message will not match if the RCS file was renamed - # manually after it was created. - log_msg = metadata_db[cvs_item.metadata_id].log_msg - cvs_generated_msg = 'file %s was initially added on branch %s.\n' % ( - self.cvs_file.basename, - self[cvs_item.branch_ids[0]].symbol.name,) - return log_msg == cvs_generated_msg - else: - return False - - def remove_unneeded_deletes(self, metadata_db): - """Remove unneeded deletes for this file. - - If a file is added on a branch, then a trunk revision is added at - the same time in the 'Dead' state. This revision doesn't do - anything useful, so delete it.""" - - for id in self.root_ids: - cvs_item = self[id] - if self._delete_unneeded(cvs_item, metadata_db): - Log().debug('Removing unnecessary delete %s' % (cvs_item,)) - - # Delete cvs_item: - self.root_ids.remove(cvs_item.id) - del self[id] - if cvs_item.next_id is not None: - cvs_rev_next = self[cvs_item.next_id] - cvs_rev_next.prev_id = None - self.root_ids.add(cvs_rev_next.id) - - # Delete all CVSBranches rooted at this revision. If there is - # a CVSRevision on the branch, it should already be an add so - # it doesn't have to be changed. - for cvs_branch_id in cvs_item.branch_ids: - cvs_branch = self[cvs_branch_id] - del self[cvs_branch.id] - - if cvs_branch.next_id is not None: - cvs_branch_next = self[cvs_branch.next_id] - cvs_branch_next.first_on_branch_id = None - cvs_branch_next.prev_id = None - self.root_ids.add(cvs_branch_next.id) - - # Tagging a dead revision doesn't do anything, so remove any - # tags that were set on 1.1: - for cvs_tag_id in cvs_item.tag_ids: - del self[cvs_tag_id] - - # This can only happen once per file, and we might have just - # changed self.root_ids, so break out of the loop: - break - - def _initial_branch_delete_unneeded(self, lod_items, metadata_db): - """Return True iff the initial revision in LOD_ITEMS can be deleted.""" - - if lod_items.cvs_branch is not None \ - and lod_items.cvs_branch.source_id is not None \ - and len(lod_items.cvs_revisions) >= 2: - cvs_revision = lod_items.cvs_revisions[0] - cvs_rev_source = self[lod_items.cvs_branch.source_id] - if isinstance(cvs_revision, CVSRevisionAbsent) \ - and not cvs_revision.tag_ids \ - and not cvs_revision.branch_ids \ - and abs(cvs_revision.timestamp - cvs_rev_source.timestamp) <= 2: - # FIXME: This message will not match if the RCS file was renamed - # manually after it was created. - log_msg = metadata_db[cvs_revision.metadata_id].log_msg - return bool(re.match( - r'file %s was added on branch .* on ' - r'\d{4}\-\d{2}\-\d{2} \d{2}\:\d{2}\:\d{2}( [\+\-]\d{4})?' - '\n' % (re.escape(self.cvs_file.basename),), - log_msg, - )) - return False - - def remove_initial_branch_deletes(self, metadata_db): - """If the first revision on a branch is an unnecessary delete, remove it. - - If a file is added on a branch (whether or not it already existed - on trunk), then new versions of CVS add a first branch revision in - the 'dead' state (to indicate that the file did not exist on the - branch when the branch was created) followed by the second branch - revision, which is an add. When we encounter this situation, we - sever the branch from trunk and delete the first branch - revision.""" - - for lod_items in self.iter_lods(): - if self._initial_branch_delete_unneeded(lod_items, metadata_db): - cvs_revision = lod_items.cvs_revisions[0] - Log().debug( - 'Removing unnecessary initial branch delete %s' % (cvs_revision,) - ) - cvs_branch = lod_items.cvs_branch - cvs_rev_source = self[cvs_branch.source_id] - cvs_rev_next = lod_items.cvs_revisions[1] - - # Delete cvs_revision: - del self[cvs_revision.id] - cvs_rev_next.prev_id = None - self.root_ids.add(cvs_rev_next.id) - cvs_rev_source.branch_commit_ids.remove(cvs_revision.id) - - # Delete the CVSBranch on which it is located: - del self[cvs_branch.id] - cvs_rev_source.branch_ids.remove(cvs_branch.id) - - def _exclude_tag(self, cvs_tag): - """Exclude the specified CVS_TAG.""" - - del self[cvs_tag.id] - - # A CVSTag is the successor of the CVSRevision that it - # sprouts from. Delete this tag from that revision's - # tag_ids: - self[cvs_tag.source_id].tag_ids.remove(cvs_tag.id) - - def _exclude_branch(self, lod_items): - """Exclude the branch described by LOD_ITEMS, including its revisions. - - (Do not update the LOD_ITEMS instance itself.) - - If the LOD starts with non-trunk default branch revisions, leave - the branch and the NTDB revisions in place, but delete any - subsequent revisions that are not NTDB revisions. In this case, - return True; otherwise return False""" - - if lod_items.cvs_revisions and lod_items.cvs_revisions[0].ntdbr: - for cvs_rev in lod_items.cvs_revisions: - if not cvs_rev.ntdbr: - # We've found the first non-NTDBR, and it's stored in cvs_rev: - break - else: - # There was no revision following the NTDBRs: - cvs_rev = None - - if cvs_rev: - last_ntdbr = self[cvs_rev.prev_id] - last_ntdbr.next_id = None - while True: - del self[cvs_rev.id] - if cvs_rev.next_id is None: - break - cvs_rev = self[cvs_rev.next_id] - - return True - - else: - if lod_items.cvs_branch is not None: - # Delete the CVSBranch itself: - cvs_branch = lod_items.cvs_branch - - del self[cvs_branch.id] - - # A CVSBranch is the successor of the CVSRevision that it - # sprouts from. Delete this branch from that revision's - # branch_ids: - self[cvs_branch.source_id].branch_ids.remove(cvs_branch.id) - - if lod_items.cvs_revisions: - # The first CVSRevision on the branch has to be either detached - # from the revision from which the branch sprang, or removed - # from self.root_ids: - cvs_rev = lod_items.cvs_revisions[0] - if cvs_rev.prev_id is None: - self.root_ids.remove(cvs_rev.id) - else: - self[cvs_rev.prev_id].branch_commit_ids.remove(cvs_rev.id) - - for cvs_rev in lod_items.cvs_revisions: - del self[cvs_rev.id] - - return False - - def graft_ntdbr_to_trunk(self): - """Graft the non-trunk default branch revisions to trunk. - - They should already be alone on a branch that may or may not have - a CVSBranch connecting it to trunk.""" - - for lod_items in self.iter_lods(): - if lod_items.cvs_revisions and lod_items.cvs_revisions[0].ntdbr: - assert lod_items.is_pure_ntdb() - - first_rev = lod_items.cvs_revisions[0] - last_rev = lod_items.cvs_revisions[-1] - rev_1_1 = self.get(first_rev.prev_id) - rev_1_2 = self.get(last_rev.ntdbr_next_id) - - if lod_items.cvs_branch is not None: - self._sever_branch(lod_items) - - if rev_1_1 is not None: - rev_1_1.next_id = first_rev.id - first_rev.prev_id = rev_1_1.id - - self.root_ids.remove(first_rev.id) - - first_rev.__class__ = cvs_revision_type_map[( - isinstance(first_rev, CVSRevisionModification), - isinstance(rev_1_1, CVSRevisionModification), - )] - - if rev_1_2 is not None: - rev_1_2.ntdbr_prev_id = None - last_rev.ntdbr_next_id = None - - if rev_1_2.prev_id is None: - self.root_ids.remove(rev_1_2.id) - - rev_1_2.prev_id = last_rev.id - last_rev.next_id = rev_1_2.id - - # The effective_pred_id of rev_1_2 was not changed, so we - # don't have to change rev_1_2's type. - - for cvs_rev in lod_items.cvs_revisions: - cvs_rev.ntdbr = False - cvs_rev.lod = self.trunk - - for cvs_branch in lod_items.cvs_branches: - cvs_branch.source_lod = self.trunk - - for cvs_tag in lod_items.cvs_tags: - cvs_tag.source_lod = self.trunk - - return - - def exclude_non_trunk(self): - """Delete all tags and branches.""" - - ntdbr_excluded = False - for lod_items in self.iter_lods(): - for cvs_tag in lod_items.cvs_tags[:]: - self._exclude_tag(cvs_tag) - lod_items.cvs_tags.remove(cvs_tag) - - if not isinstance(lod_items.lod, Trunk): - assert not lod_items.cvs_branches - - ntdbr_excluded |= self._exclude_branch(lod_items) - - if ntdbr_excluded: - self.graft_ntdbr_to_trunk() - - def filter_excluded_symbols(self, revision_excluder): - """Delete any excluded symbols and references to them. - - Call the revision_excluder's callback methods to let it know what - is being excluded.""" - - ntdbr_excluded = False - for lod_items in self.iter_lods(): - # Delete any excluded tags: - for cvs_tag in lod_items.cvs_tags[:]: - if isinstance(cvs_tag.symbol, ExcludedSymbol): - self._exclude_tag(cvs_tag) - - lod_items.cvs_tags.remove(cvs_tag) - - # Delete the whole branch if it is to be excluded: - if isinstance(lod_items.lod, ExcludedSymbol): - # A symbol can only be excluded if no other symbols spring - # from it. This was already checked in CollateSymbolsPass, so - # these conditions should already be satisfied. - assert not list(lod_items.iter_blockers()) - - ntdbr_excluded |= self._exclude_branch(lod_items) - - if ntdbr_excluded: - self.graft_ntdbr_to_trunk() - - revision_excluder.process_file(self) - - def _mutate_branch_to_tag(self, cvs_branch): - """Mutate the branch CVS_BRANCH into a tag.""" - - if cvs_branch.next_id is not None: - # This shouldn't happen because it was checked in - # CollateSymbolsPass: - raise FatalError('Attempt to exclude a branch with commits.') - cvs_tag = CVSTag( - cvs_branch.id, cvs_branch.cvs_file, cvs_branch.symbol, - cvs_branch.source_lod, cvs_branch.source_id, - cvs_branch.revision_recorder_token, - ) - self.add(cvs_tag) - cvs_revision = self[cvs_tag.source_id] - cvs_revision.branch_ids.remove(cvs_tag.id) - cvs_revision.tag_ids.append(cvs_tag.id) - - def _mutate_tag_to_branch(self, cvs_tag): - """Mutate the tag into a branch.""" - - cvs_branch = CVSBranch( - cvs_tag.id, cvs_tag.cvs_file, cvs_tag.symbol, - None, cvs_tag.source_lod, cvs_tag.source_id, None, - cvs_tag.revision_recorder_token, - ) - self.add(cvs_branch) - cvs_revision = self[cvs_branch.source_id] - cvs_revision.tag_ids.remove(cvs_branch.id) - cvs_revision.branch_ids.append(cvs_branch.id) - - def _mutate_symbol(self, cvs_symbol): - """Mutate CVS_SYMBOL if necessary.""" - - symbol = cvs_symbol.symbol - if isinstance(cvs_symbol, CVSBranch) and isinstance(symbol, Tag): - self._mutate_branch_to_tag(cvs_symbol) - elif isinstance(cvs_symbol, CVSTag) and isinstance(symbol, Branch): - self._mutate_tag_to_branch(cvs_symbol) - - def mutate_symbols(self): - """Force symbols to be tags/branches based on self.symbol_db.""" - - for cvs_item in self.values(): - if isinstance(cvs_item, CVSRevision): - # This CVSRevision may be affected by the mutation of any - # CVSSymbols that it references, but there is nothing to do - # here directly. - pass - elif isinstance(cvs_item, CVSSymbol): - self._mutate_symbol(cvs_item) - else: - raise RuntimeError('Unknown cvs item type') - - def _adjust_tag_parent(self, cvs_tag): - """Adjust the parent of CVS_TAG if possible and preferred. - - CVS_TAG is an instance of CVSTag. This method must be called in - leaf-to-trunk order.""" - - # The Symbol that cvs_tag would like to have as a parent: - preferred_parent = Ctx()._symbol_db.get_symbol( - cvs_tag.symbol.preferred_parent_id) - - if cvs_tag.source_lod == preferred_parent: - # The preferred parent is already the parent. - return - - # The CVSRevision that is its direct parent: - source = self[cvs_tag.source_id] - assert isinstance(source, CVSRevision) - - if isinstance(preferred_parent, Trunk): - # It is not possible to graft *onto* Trunk: - return - - # Try to find the preferred parent among the possible parents: - for branch_id in source.branch_ids: - if self[branch_id].symbol == preferred_parent: - # We found it! - break - else: - # The preferred parent is not a possible parent in this file. - return - - parent = self[branch_id] - assert isinstance(parent, CVSBranch) - - Log().debug('Grafting %s from %s (on %s) onto %s' % ( - cvs_tag, source, source.lod, parent,)) - # Switch parent: - source.tag_ids.remove(cvs_tag.id) - parent.tag_ids.append(cvs_tag.id) - cvs_tag.source_lod = parent.symbol - cvs_tag.source_id = parent.id - - def _adjust_branch_parents(self, cvs_branch): - """Adjust the parent of CVS_BRANCH if possible and preferred. - - CVS_BRANCH is an instance of CVSBranch. This method must be - called in leaf-to-trunk order.""" - - # The Symbol that cvs_branch would like to have as a parent: - preferred_parent = Ctx()._symbol_db.get_symbol( - cvs_branch.symbol.preferred_parent_id) - - if cvs_branch.source_lod == preferred_parent: - # The preferred parent is already the parent. - return - - # The CVSRevision that is its direct parent: - source = self[cvs_branch.source_id] - # This is always a CVSRevision because we haven't adjusted it yet: - assert isinstance(source, CVSRevision) - - if isinstance(preferred_parent, Trunk): - # It is not possible to graft *onto* Trunk: - return - - # Try to find the preferred parent among the possible parents: - for branch_id in source.branch_ids: - possible_parent = self[branch_id] - if possible_parent.symbol == preferred_parent: - # We found it! - break - elif possible_parent.symbol == cvs_branch.symbol: - # Only branches that precede the branch to be adjusted are - # considered possible parents. Leave parentage unchanged: - return - else: - # This point should never be reached. - raise InternalError( - 'Possible parent search did not terminate as expected') - - parent = possible_parent - assert isinstance(parent, CVSBranch) - - Log().debug('Grafting %s from %s (on %s) onto %s' % ( - cvs_branch, source, source.lod, parent,)) - # Switch parent: - source.branch_ids.remove(cvs_branch.id) - parent.branch_ids.append(cvs_branch.id) - cvs_branch.source_lod = parent.symbol - cvs_branch.source_id = parent.id - - def adjust_parents(self): - """Adjust the parents of symbols to their preferred parents. - - If a CVSSymbol has a preferred parent that is different than its - current parent, and if the preferred parent is an allowed parent - of the CVSSymbol in this file, then graft the CVSSymbol onto its - preferred parent.""" - - for lod_items in self.iter_lods(): - for cvs_tag in lod_items.cvs_tags: - self._adjust_tag_parent(cvs_tag) - - for cvs_branch in lod_items.cvs_branches: - self._adjust_branch_parents(cvs_branch) - - def _get_revision_source(self, cvs_symbol): - """Return the CVSRevision that is the ultimate source of CVS_SYMBOL.""" - - while True: - cvs_item = self[cvs_symbol.source_id] - if isinstance(cvs_item, CVSRevision): - return cvs_item - else: - cvs_symbol = cvs_item - - def refine_symbols(self): - """Refine the types of the CVSSymbols in this file. - - Adjust the symbol types based on whether the source exists: - CVSBranch vs. CVSBranchNoop and CVSTag vs. CVSTagNoop.""" - - for lod_items in self.iter_lods(): - for cvs_tag in lod_items.cvs_tags: - source = self._get_revision_source(cvs_tag) - cvs_tag.__class__ = cvs_tag_type_map[ - isinstance(source, CVSRevisionModification) - ] - - for cvs_branch in lod_items.cvs_branches: - source = self._get_revision_source(cvs_branch) - cvs_branch.__class__ = cvs_branch_type_map[ - isinstance(source, CVSRevisionModification) - ] - - def record_opened_symbols(self): - """Set CVSRevision.opened_symbols for the surviving revisions.""" - - for cvs_item in self.values(): - if isinstance(cvs_item, (CVSRevision, CVSBranch)): - cvs_item.opened_symbols = [] - for cvs_symbol_opened_id in cvs_item.get_cvs_symbol_ids_opened(): - cvs_symbol_opened = self[cvs_symbol_opened_id] - cvs_item.opened_symbols.append( - (cvs_symbol_opened.symbol.id, cvs_symbol_opened.id,) - ) - - def record_closed_symbols(self): - """Set CVSRevision.closed_symbols for the surviving revisions. - - A CVSRevision closes the symbols that were opened by the CVSItems - that the CVSRevision closes. Got it? - - This method must be called after record_opened_symbols().""" - - for cvs_item in self.values(): - if isinstance(cvs_item, CVSRevision): - cvs_item.closed_symbols = [] - for cvs_item_closed_id in cvs_item.get_ids_closed(): - cvs_item_closed = self[cvs_item_closed_id] - cvs_item.closed_symbols.extend(cvs_item_closed.opened_symbols) - - diff --git a/cvs2svn_lib/cvs_item.py b/cvs2svn_lib/cvs_item.py deleted file mode 100644 index 5c01a24..0000000 --- a/cvs2svn_lib/cvs_item.py +++ /dev/null @@ -1,901 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2000-2008 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""This module contains classes to store atomic CVS events. - -A CVSItem is a single event, pertaining to a single file, that can be -determined to have occured based on the information in the CVS -repository. - -The inheritance tree is as follows: - -CVSItem -| -+--CVSRevision -| | -| +--CVSRevisionModification (* -> 'Exp') -| | | -| | +--CVSRevisionAdd ('dead' -> 'Exp') -| | | -| | +--CVSRevisionChange ('Exp' -> 'Exp') -| | -| +--CVSRevisionAbsent (* -> 'dead') -| | -| +--CVSRevisionDelete ('Exp' -> 'dead') -| | -| +--CVSRevisionNoop ('dead' -> 'dead') -| -+--CVSSymbol - | - +--CVSBranch - | | - | +--CVSBranchNoop - | - +--CVSTag - | - +--CVSTagNoop - -""" - - -from cvs2svn_lib.context import Ctx - - -class CVSItem(object): - __slots__ = [ - 'id', - 'cvs_file', - 'revision_recorder_token', - ] - - def __init__(self, id, cvs_file, revision_recorder_token): - self.id = id - self.cvs_file = cvs_file - self.revision_recorder_token = revision_recorder_token - - def __eq__(self, other): - return self.id == other.id - - def __cmp__(self, other): - return cmp(self.id, other.id) - - def __hash__(self): - return self.id - - def __getstate__(self): - raise NotImplementedError() - - def __setstate__(self, data): - raise NotImplementedError() - - def get_svn_path(self): - """Return the SVN path associated with this CVSItem.""" - - raise NotImplementedError() - - def get_pred_ids(self): - """Return the CVSItem.ids of direct predecessors of SELF. - - A predecessor is defined to be a CVSItem that has to have been - committed before this one.""" - - raise NotImplementedError() - - def get_succ_ids(self): - """Return the CVSItem.ids of direct successors of SELF. - - A direct successor is defined to be a CVSItem that has this one as - a direct predecessor.""" - - raise NotImplementedError() - - def get_cvs_symbol_ids_opened(self): - """Return an iterable over the ids of CVSSymbols that this item opens. - - The definition of 'open' is that the path corresponding to this - CVSItem will have to be copied when filling the corresponding - symbol.""" - - raise NotImplementedError() - - def get_ids_closed(self): - """Return an iterable over the CVSItem.ids of CVSItems closed by this one. - - A CVSItem A is said to close a CVSItem B if committing A causes B - to be overwritten or deleted (no longer available) in the SVN - repository. This is interesting because it sets the last SVN - revision number from which the contents of B can be copied (for - example, to fill a symbol). See the concrete implementations of - this method for the exact rules about what closes what.""" - - raise NotImplementedError() - - def check_links(self, cvs_file_items): - """Check for consistency of links to other CVSItems. - - Other items can be looked up in CVS_FILE_ITEMS, which is an - instance of CVSFileItems. Raise an AssertionError if there is a - problem.""" - - raise NotImplementedError() - - def __repr__(self): - return '%s(%s)' % (self.__class__.__name__, self,) - - -class CVSRevision(CVSItem): - """Information about a single CVS revision. - - A CVSRevision holds the information known about a single version of - a single file. - - Members: - - id -- (int) unique ID for this revision. - - cvs_file -- (CVSFile) CVSFile affected by this revision. - - timestamp -- (int) date stamp for this revision. - - metadata_id -- (int) id of metadata instance record in - metadata_db. - - prev_id -- (int) id of the logically previous CVSRevision, either - on the same or the source branch (or None). - - next_id -- (int) id of the logically next CVSRevision (or None). - - rev -- (string) the CVS revision number, e.g., '1.3'. - - deltatext_exists -- (bool) true iff this revision's deltatext is - not empty. - - lod -- (LineOfDevelopment) LOD on which this revision occurred. - - first_on_branch_id -- (int or None) if this revision is the first - on its branch, the cvs_branch_id of that branch; else, None. - - ntdbr -- (bool) true iff this is a non-trunk default branch - revision. - - ntdbr_prev_id -- (int or None) Iff this is the 1.2 revision after - the end of a default branch, the id of the last rev on the - default branch; else, None. - - ntdbr_next_id -- (int or None) Iff this is the last revision on a - default branch preceding a 1.2 rev, the id of the 1.2 - revision; else, None. - - tag_ids -- (list of int) ids of all CVSTags rooted at this - CVSRevision. - - branch_ids -- (list of int) ids of all CVSBranches rooted at this - CVSRevision. - - branch_commit_ids -- (list of int) ids of first CVSRevision - committed on each branch rooted in this revision (for branches - with commits). - - opened_symbols -- (None or list of (symbol_id, cvs_symbol_id) - tuples) information about all CVSSymbols opened by this - revision. This member is set in FilterSymbolsPass; before - then, it is None. - - closed_symbols -- (None or list of (symbol_id, cvs_symbol_id) - tuples) information about all CVSSymbols closed by this - revision. This member is set in FilterSymbolsPass; before - then, it is None. - - revision_recorder_token -- (arbitrary) a token that can be set by - RevisionRecorder for the later use of RevisionReader. - - """ - - __slots__ = [ - 'timestamp', - 'metadata_id', - 'prev_id', - 'next_id', - 'rev', - 'deltatext_exists', - 'lod', - 'first_on_branch_id', - 'ntdbr', - 'ntdbr_prev_id', - 'ntdbr_next_id', - 'tag_ids', - 'branch_ids', - 'branch_commit_ids', - 'opened_symbols', - 'closed_symbols', - ] - - def __init__(self, - id, cvs_file, - timestamp, metadata_id, - prev_id, next_id, - rev, deltatext_exists, - lod, first_on_branch_id, ntdbr, - ntdbr_prev_id, ntdbr_next_id, - tag_ids, branch_ids, branch_commit_ids, - revision_recorder_token): - """Initialize a new CVSRevision object.""" - - CVSItem.__init__(self, id, cvs_file, revision_recorder_token) - - self.timestamp = timestamp - self.metadata_id = metadata_id - self.prev_id = prev_id - self.next_id = next_id - self.rev = rev - self.deltatext_exists = deltatext_exists - self.lod = lod - self.first_on_branch_id = first_on_branch_id - self.ntdbr = ntdbr - self.ntdbr_prev_id = ntdbr_prev_id - self.ntdbr_next_id = ntdbr_next_id - self.tag_ids = tag_ids - self.branch_ids = branch_ids - self.branch_commit_ids = branch_commit_ids - self.opened_symbols = None - self.closed_symbols = None - - def _get_cvs_path(self): - return self.cvs_file.cvs_path - - cvs_path = property(_get_cvs_path) - - def get_svn_path(self): - return self.lod.get_path(self.cvs_file.cvs_path) - - def __getstate__(self): - """Return the contents of this instance, for pickling. - - The presence of this method improves the space efficiency of - pickling CVSRevision instances.""" - - return ( - self.id, self.cvs_file.id, - self.timestamp, self.metadata_id, - self.prev_id, self.next_id, - self.rev, - self.deltatext_exists, - self.lod.id, - self.first_on_branch_id, - self.ntdbr, - self.ntdbr_prev_id, self.ntdbr_next_id, - self.tag_ids, self.branch_ids, self.branch_commit_ids, - self.opened_symbols, self.closed_symbols, - self.revision_recorder_token, - ) - - def __setstate__(self, data): - (self.id, cvs_file_id, - self.timestamp, self.metadata_id, - self.prev_id, self.next_id, - self.rev, - self.deltatext_exists, - lod_id, - self.first_on_branch_id, - self.ntdbr, - self.ntdbr_prev_id, self.ntdbr_next_id, - self.tag_ids, self.branch_ids, self.branch_commit_ids, - self.opened_symbols, self.closed_symbols, - self.revision_recorder_token) = data - self.cvs_file = Ctx()._cvs_file_db.get_file(cvs_file_id) - self.lod = Ctx()._symbol_db.get_symbol(lod_id) - - def get_effective_prev_id(self): - """Return the ID of the effective predecessor of this item. - - This is the ID of the item that determines whether the object - existed before this CVSRevision.""" - - if self.ntdbr_prev_id is not None: - return self.ntdbr_prev_id - else: - return self.prev_id - - def get_symbol_pred_ids(self): - """Return the pred_ids for symbol predecessors.""" - - retval = set() - if self.first_on_branch_id is not None: - retval.add(self.first_on_branch_id) - return retval - - def get_pred_ids(self): - retval = self.get_symbol_pred_ids() - if self.prev_id is not None: - retval.add(self.prev_id) - if self.ntdbr_prev_id is not None: - retval.add(self.ntdbr_prev_id) - return retval - - def get_symbol_succ_ids(self): - """Return the succ_ids for symbol successors.""" - - retval = set() - for id in self.branch_ids + self.tag_ids: - retval.add(id) - return retval - - def get_succ_ids(self): - retval = self.get_symbol_succ_ids() - if self.next_id is not None: - retval.add(self.next_id) - if self.ntdbr_next_id is not None: - retval.add(self.ntdbr_next_id) - for id in self.branch_commit_ids: - retval.add(id) - return retval - - def get_ids_closed(self): - # Special handling is needed in the case of non-trunk default - # branches. The following cases have to be handled: - # - # Case 1: Revision 1.1 not deleted; revision 1.2 exists: - # - # 1.1 -----------------> 1.2 - # \ ^ ^ / - # \ | | / - # 1.1.1.1 -> 1.1.1.2 - # - # * 1.1.1.1 closes 1.1 (because its post-commit overwrites 1.1 - # on trunk) - # - # * 1.1.1.2 closes 1.1.1.1 - # - # * 1.2 doesn't close anything (the post-commit from 1.1.1.1 - # already closed 1.1, and no symbols can sprout from the - # post-commit of 1.1.1.2) - # - # Case 2: Revision 1.1 not deleted; revision 1.2 does not exist: - # - # 1.1 .................. - # \ ^ ^ - # \ | | - # 1.1.1.1 -> 1.1.1.2 - # - # * 1.1.1.1 closes 1.1 (because its post-commit overwrites 1.1 - # on trunk) - # - # * 1.1.1.2 closes 1.1.1.1 - # - # Case 3: Revision 1.1 deleted; revision 1.2 exists: - # - # ............... 1.2 - # ^ ^ / - # | | / - # 1.1.1.1 -> 1.1.1.2 - # - # * 1.1.1.1 doesn't close anything - # - # * 1.1.1.2 closes 1.1.1.1 - # - # * 1.2 doesn't close anything (no symbols can sprout from the - # post-commit of 1.1.1.2) - # - # Case 4: Revision 1.1 deleted; revision 1.2 doesn't exist: - # - # ............... - # ^ ^ - # | | - # 1.1.1.1 -> 1.1.1.2 - # - # * 1.1.1.1 doesn't close anything - # - # * 1.1.1.2 closes 1.1.1.1 - - if self.first_on_branch_id is not None: - # The first CVSRevision on a branch is considered to close the - # branch: - yield self.first_on_branch_id - if self.ntdbr: - # If the 1.1 revision was not deleted, the 1.1.1.1 revision is - # considered to close it: - yield self.prev_id - elif self.ntdbr_prev_id is not None: - # This is the special case of a 1.2 revision that follows a - # non-trunk default branch. Either 1.1 was deleted or the first - # default branch revision closed 1.1, so we don't have to close - # 1.1. Technically, we close the revision on trunk that was - # copied from the last non-trunk default branch revision in a - # post-commit, but for now no symbols can sprout from that - # revision so we ignore that one, too. - pass - elif self.prev_id is not None: - # Since this CVSRevision is not the first on a branch, its - # prev_id is on the same LOD and this item closes that one: - yield self.prev_id - - def _get_branch_ids_recursively(self, cvs_file_items): - """Return the set of all CVSBranches that sprout from this CVSRevision. - - After parent adjustment in FilterSymbolsPass, it is possible for - branches to sprout directly from a CVSRevision, or from those - branches, etc. Return all branches that sprout from this - CVSRevision, directly or indirectly.""" - - retval = set() - branch_ids_to_process = list(self.branch_ids) - while branch_ids_to_process: - branch = cvs_file_items[branch_ids_to_process.pop()] - retval.add(branch) - branch_ids_to_process.extend(branch.branch_ids) - - return retval - - def check_links(self, cvs_file_items): - assert self.cvs_file == cvs_file_items.cvs_file - - prev = cvs_file_items.get(self.prev_id) - next = cvs_file_items.get(self.next_id) - first_on_branch = cvs_file_items.get(self.first_on_branch_id) - ntdbr_next = cvs_file_items.get(self.ntdbr_next_id) - ntdbr_prev = cvs_file_items.get(self.ntdbr_prev_id) - effective_prev = cvs_file_items.get(self.get_effective_prev_id()) - - if prev is None: - # This is the first CVSRevision on trunk or a detached branch: - assert self.id in cvs_file_items.root_ids - elif first_on_branch is not None: - # This is the first CVSRevision on an existing branch: - assert isinstance(first_on_branch, CVSBranch) - assert first_on_branch.symbol == self.lod - assert first_on_branch.next_id == self.id - cvs_revision_source = first_on_branch.get_cvs_revision_source( - cvs_file_items - ) - assert cvs_revision_source.id == prev.id - assert self.id in prev.branch_commit_ids - else: - # This revision follows another revision on the same LOD: - assert prev.next_id == self.id - assert prev.lod == self.lod - - if next is not None: - assert next.prev_id == self.id - assert next.lod == self.lod - - if ntdbr_next is not None: - assert self.ntdbr - assert ntdbr_next.ntdbr_prev_id == self.id - - if ntdbr_prev is not None: - assert ntdbr_prev.ntdbr_next_id == self.id - - for tag_id in self.tag_ids: - tag = cvs_file_items[tag_id] - assert isinstance(tag, CVSTag) - assert tag.source_id == self.id - assert tag.source_lod == self.lod - - for branch_id in self.branch_ids: - branch = cvs_file_items[branch_id] - assert isinstance(branch, CVSBranch) - assert branch.source_id == self.id - assert branch.source_lod == self.lod - - branch_commit_ids = list(self.branch_commit_ids) - - for branch in self._get_branch_ids_recursively(cvs_file_items): - assert isinstance(branch, CVSBranch) - if branch.next_id is not None: - assert branch.next_id in branch_commit_ids - branch_commit_ids.remove(branch.next_id) - - assert not branch_commit_ids - - assert self.__class__ == cvs_revision_type_map[( - isinstance(self, CVSRevisionModification), - effective_prev is not None - and isinstance(effective_prev, CVSRevisionModification), - )] - - def __str__(self): - """For convenience only. The format is subject to change at any time.""" - - return '%s:%s<%x>' % (self.cvs_file, self.rev, self.id,) - - -class CVSRevisionModification(CVSRevision): - """Base class for CVSRevisionAdd or CVSRevisionChange.""" - - __slots__ = [] - - def get_cvs_symbol_ids_opened(self): - return self.tag_ids + self.branch_ids - - -class CVSRevisionAdd(CVSRevisionModification): - """A CVSRevision that creates a file that previously didn't exist. - - The file might have never existed on this LOD, or it might have - existed previously but been deleted by a CVSRevisionDelete.""" - - __slots__ = [] - - -class CVSRevisionChange(CVSRevisionModification): - """A CVSRevision that modifies a file that already existed on this LOD.""" - - __slots__ = [] - - -class CVSRevisionAbsent(CVSRevision): - """A CVSRevision for which the file is nonexistent on this LOD.""" - - __slots__ = [] - - def get_cvs_symbol_ids_opened(self): - return [] - - -class CVSRevisionDelete(CVSRevisionAbsent): - """A CVSRevision that deletes a file that existed on this LOD.""" - - __slots__ = [] - - -class CVSRevisionNoop(CVSRevisionAbsent): - """A CVSRevision that doesn't do anything. - - The revision was 'dead' and the predecessor either didn't exist or - was also 'dead'. These revisions can't necessarily be thrown away - because (1) they impose ordering constraints on other items; (2) - they might have a nontrivial log message that we don't want to throw - away.""" - - __slots__ = [] - - -# A map -# -# {(nondead(cvs_rev), nondead(prev_cvs_rev)) : cvs_revision_subtype} -# -# , where nondead() means that the cvs revision exists and is not -# 'dead', and CVS_REVISION_SUBTYPE is the subtype of CVSRevision that -# should be used for CVS_REV. -cvs_revision_type_map = { - (False, False) : CVSRevisionNoop, - (False, True) : CVSRevisionDelete, - (True, False) : CVSRevisionAdd, - (True, True) : CVSRevisionChange, - } - - -class CVSSymbol(CVSItem): - """Represent a symbol on a particular CVSFile. - - This is the base class for CVSBranch and CVSTag. - - Members: - - id -- (int) unique ID for this item. - - cvs_file -- (CVSFile) CVSFile affected by this item. - - symbol -- (Symbol) the symbol affected by this CVSSymbol. - - source_lod -- (LineOfDevelopment) the LOD that is the source for - this CVSSymbol. - - source_id -- (int) the ID of the CVSRevision or CVSBranch that is - the source for this item. This initially points to a - CVSRevision, but can be changed to a CVSBranch via parent - adjustment in FilterSymbolsPass. - - revision_recorder_token -- (arbitrary) a token that can be set by - RevisionRecorder for the later use of RevisionReader. - - """ - - __slots__ = [ - 'symbol', - 'source_lod', - 'source_id', - ] - - def __init__( - self, id, cvs_file, symbol, source_lod, source_id, - revision_recorder_token - ): - """Initialize a CVSSymbol object.""" - - CVSItem.__init__(self, id, cvs_file, revision_recorder_token) - - self.symbol = symbol - self.source_lod = source_lod - self.source_id = source_id - - def get_cvs_revision_source(self, cvs_file_items): - """Return the CVSRevision that is the ultimate source of this symbol.""" - - cvs_source = cvs_file_items[self.source_id] - while not isinstance(cvs_source, CVSRevision): - cvs_source = cvs_file_items[cvs_source.source_id] - - return cvs_source - - def get_svn_path(self): - return self.symbol.get_path(self.cvs_file.cvs_path) - - def get_ids_closed(self): - # A Symbol does not close any other CVSItems: - return [] - - -class CVSBranch(CVSSymbol): - """Represent the creation of a branch in a particular CVSFile. - - Members: - - id -- (int) unique ID for this item. - - cvs_file -- (CVSFile) CVSFile affected by this item. - - symbol -- (Symbol) the symbol affected by this CVSSymbol. - - branch_number -- (string) the number of this branch (e.g., - '1.3.4'), or None if this is a converted CVSTag. - - source_lod -- (LineOfDevelopment) the LOD that is the source for - this CVSSymbol. - - source_id -- (int) id of the CVSRevision or CVSBranch from which - this branch sprouts. This initially points to a CVSRevision, - but can be changed to a CVSBranch via parent adjustment in - FilterSymbolsPass. - - next_id -- (int or None) id of first CVSRevision on this branch, - if any; else, None. - - tag_ids -- (list of int) ids of all CVSTags rooted at this - CVSBranch (can be set due to parent adjustment in - FilterSymbolsPass). - - branch_ids -- (list of int) ids of all CVSBranches rooted at this - CVSBranch (can be set due to parent adjustment in - FilterSymbolsPass). - - opened_symbols -- (None or list of (symbol_id, cvs_symbol_id) - tuples) information about all CVSSymbols opened by this - branch. This member is set in FilterSymbolsPass; before then, - it is None. - - revision_recorder_token -- (arbitrary) a token that can be set by - RevisionRecorder for the later use of RevisionReader. - - """ - - __slots__ = [ - 'branch_number', - 'next_id', - 'tag_ids', - 'branch_ids', - 'opened_symbols', - ] - - def __init__( - self, id, cvs_file, symbol, branch_number, - source_lod, source_id, next_id, - revision_recorder_token, - ): - """Initialize a CVSBranch.""" - - CVSSymbol.__init__( - self, id, cvs_file, symbol, source_lod, source_id, - revision_recorder_token - ) - self.branch_number = branch_number - self.next_id = next_id - self.tag_ids = [] - self.branch_ids = [] - self.opened_symbols = None - - def __getstate__(self): - return ( - self.id, self.cvs_file.id, - self.symbol.id, self.branch_number, - self.source_lod.id, self.source_id, self.next_id, - self.tag_ids, self.branch_ids, - self.opened_symbols, - self.revision_recorder_token, - ) - - def __setstate__(self, data): - ( - self.id, cvs_file_id, - symbol_id, self.branch_number, - source_lod_id, self.source_id, self.next_id, - self.tag_ids, self.branch_ids, - self.opened_symbols, - self.revision_recorder_token, - ) = data - self.cvs_file = Ctx()._cvs_file_db.get_file(cvs_file_id) - self.symbol = Ctx()._symbol_db.get_symbol(symbol_id) - self.source_lod = Ctx()._symbol_db.get_symbol(source_lod_id) - - def get_pred_ids(self): - return set([self.source_id]) - - def get_succ_ids(self): - retval = set(self.tag_ids + self.branch_ids) - if self.next_id is not None: - retval.add(self.next_id) - return retval - - def get_cvs_symbol_ids_opened(self): - return self.tag_ids + self.branch_ids - - def check_links(self, cvs_file_items): - source = cvs_file_items.get(self.source_id) - next = cvs_file_items.get(self.next_id) - - assert self.id in source.branch_ids - if isinstance(source, CVSRevision): - assert self.source_lod == source.lod - elif isinstance(source, CVSBranch): - assert self.source_lod == source.symbol - else: - assert False - - if next is not None: - assert isinstance(next, CVSRevision) - assert next.lod == self.symbol - assert next.first_on_branch_id == self.id - - for tag_id in self.tag_ids: - tag = cvs_file_items[tag_id] - assert isinstance(tag, CVSTag) - assert tag.source_id == self.id - assert tag.source_lod == self.symbol - - for branch_id in self.branch_ids: - branch = cvs_file_items[branch_id] - assert isinstance(branch, CVSBranch) - assert branch.source_id == self.id - assert branch.source_lod == self.symbol - - def __str__(self): - """For convenience only. The format is subject to change at any time.""" - - return '%s:%s:%s<%x>' \ - % (self.cvs_file, self.symbol, self.branch_number, self.id,) - - -class CVSBranchNoop(CVSBranch): - """A CVSBranch whose source is a CVSRevisionAbsent.""" - - __slots__ = [] - - def get_cvs_symbol_ids_opened(self): - return [] - - -# A map -# -# {nondead(source_cvs_rev) : cvs_branch_subtype} -# -# , where nondead() means that the cvs revision exists and is not -# 'dead', and CVS_BRANCH_SUBTYPE is the subtype of CVSBranch that -# should be used. -cvs_branch_type_map = { - False : CVSBranchNoop, - True : CVSBranch, - } - - -class CVSTag(CVSSymbol): - """Represent the creation of a tag on a particular CVSFile. - - Members: - - id -- (int) unique ID for this item. - - cvs_file -- (CVSFile) CVSFile affected by this item. - - symbol -- (Symbol) the symbol affected by this CVSSymbol. - - source_lod -- (LineOfDevelopment) the LOD that is the source for - this CVSSymbol. - - source_id -- (int) the ID of the CVSRevision or CVSBranch that is - being tagged. This initially points to a CVSRevision, but can - be changed to a CVSBranch via parent adjustment in - FilterSymbolsPass. - - revision_recorder_token -- (arbitrary) a token that can be set by - RevisionRecorder for the later use of RevisionReader. - - """ - - __slots__ = [] - - def __init__( - self, id, cvs_file, symbol, source_lod, source_id, - revision_recorder_token, - ): - """Initialize a CVSTag.""" - - CVSSymbol.__init__( - self, id, cvs_file, symbol, source_lod, source_id, - revision_recorder_token, - ) - - def __getstate__(self): - return ( - self.id, self.cvs_file.id, self.symbol.id, - self.source_lod.id, self.source_id, - self.revision_recorder_token, - ) - - def __setstate__(self, data): - ( - self.id, cvs_file_id, symbol_id, source_lod_id, self.source_id, - self.revision_recorder_token, - ) = data - self.cvs_file = Ctx()._cvs_file_db.get_file(cvs_file_id) - self.symbol = Ctx()._symbol_db.get_symbol(symbol_id) - self.source_lod = Ctx()._symbol_db.get_symbol(source_lod_id) - - def get_pred_ids(self): - return set([self.source_id]) - - def get_succ_ids(self): - return set() - - def get_cvs_symbol_ids_opened(self): - return [] - - def check_links(self, cvs_file_items): - source = cvs_file_items.get(self.source_id) - - assert self.id in source.tag_ids - if isinstance(source, CVSRevision): - assert self.source_lod == source.lod - elif isinstance(source, CVSBranch): - assert self.source_lod == source.symbol - else: - assert False - - def __str__(self): - """For convenience only. The format is subject to change at any time.""" - - return '%s:%s<%x>' \ - % (self.cvs_file, self.symbol, self.id,) - - -class CVSTagNoop(CVSTag): - """A CVSTag whose source is a CVSRevisionAbsent.""" - - __slots__ = [] - - -# A map -# -# {nondead(source_cvs_rev) : cvs_tag_subtype} -# -# , where nondead() means that the cvs revision exists and is not -# 'dead', and CVS_TAG_SUBTYPE is the subtype of CVSTag that should be -# used. -cvs_tag_type_map = { - False : CVSTagNoop, - True : CVSTag, - } - - diff --git a/cvs2svn_lib/cvs_item_database.py b/cvs2svn_lib/cvs_item_database.py deleted file mode 100644 index f072252..0000000 --- a/cvs2svn_lib/cvs_item_database.py +++ /dev/null @@ -1,248 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2000-2008 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""This module contains a database that can store arbitrary CVSItems.""" - - -import re -import cPickle - -from cvs2svn_lib.cvs_item import CVSRevisionAdd -from cvs2svn_lib.cvs_item import CVSRevisionChange -from cvs2svn_lib.cvs_item import CVSRevisionDelete -from cvs2svn_lib.cvs_item import CVSRevisionNoop -from cvs2svn_lib.cvs_item import CVSBranch -from cvs2svn_lib.cvs_item import CVSBranchNoop -from cvs2svn_lib.cvs_item import CVSTag -from cvs2svn_lib.cvs_item import CVSTagNoop -from cvs2svn_lib.cvs_file_items import CVSFileItems -from cvs2svn_lib.serializer import Serializer -from cvs2svn_lib.serializer import PrimedPickleSerializer -from cvs2svn_lib.database import IndexedStore - - -cvs_item_primer = ( - CVSRevisionAdd, CVSRevisionChange, - CVSRevisionDelete, CVSRevisionNoop, - CVSBranch, CVSBranchNoop, - CVSTag, CVSTagNoop, - ) - - -class NewCVSItemStore: - """A file of sequential CVSItems, grouped by CVSFile. - - The file consists of a sequence of pickles. The zeroth one is a - Serializer as described in the serializer module. Subsequent ones - are pickled lists of CVSItems, each list containing all of the - CVSItems for a single file. - - We don't use a single pickler for all items because the memo would - grow too large.""" - - def __init__(self, filename): - """Initialize an instance, creating the file and writing the primer.""" - - self.f = open(filename, 'wb') - - self.serializer = PrimedPickleSerializer( - cvs_item_primer + (CVSFileItems,) - ) - cPickle.dump(self.serializer, self.f, -1) - - def add(self, cvs_file_items): - """Write CVS_FILE_ITEMS into the database.""" - - self.serializer.dumpf(self.f, cvs_file_items) - - def close(self): - self.f.close() - self.f = None - - -class OldCVSItemStore: - """Read a file created by NewCVSItemStore. - - The file must be read sequentially, one CVSFileItems instance at a - time.""" - - def __init__(self, filename): - self.f = open(filename, 'rb') - - # Read the memo from the first pickle: - self.serializer = cPickle.load(self.f) - - def iter_cvs_file_items(self): - """Iterate through the CVSFileItems instances, one file at a time. - - Each time yield a CVSFileItems instance for one CVSFile.""" - - try: - while True: - yield self.serializer.loadf(self.f) - except EOFError: - return - - def close(self): - self.f.close() - self.f = None - - -class LinewiseSerializer(Serializer): - """A serializer that writes exactly one line for each object. - - The actual serialization is done by a wrapped serializer; this class - only escapes any newlines in the serialized data then appends a - single newline.""" - - def __init__(self, wrapee): - self.wrapee = wrapee - - @staticmethod - def _encode_newlines(s): - """Return s with newlines and backslashes encoded. - - The string is returned with the following character transformations: - - LF -> \n - CR -> \r - ^Z -> \z (needed for Windows) - \ -> \\ - - """ - - return s.replace('\\', '\\\\') \ - .replace('\n', '\\n') \ - .replace('\r', '\\r') \ - .replace('\x1a', '\\z') - - _escape_re = re.compile(r'(\\\\|\\n|\\r|\\z)') - _subst = {'\\n' : '\n', '\\r' : '\r', '\\z' : '\x1a', '\\\\' : '\\'} - - @staticmethod - def _decode_newlines(s): - """Return s with newlines and backslashes decoded. - - This function reverses the encoding of _encode_newlines(). - - """ - - def repl(m): - return LinewiseSerializer._subst[m.group(1)] - - return LinewiseSerializer._escape_re.sub(repl, s) - - def dumpf(self, f, object): - f.write(self.dumps(object)) - - def dumps(self, object): - return self._encode_newlines(self.wrapee.dumps(object)) + '\n' - - def loadf(self, f): - return self.loads(f.readline()) - - def loads(self, s): - return self.wrapee.loads(self._decode_newlines(s[:-1])) - - -class NewSortableCVSRevisionDatabase(object): - """A serially-accessible, sortable file for holding CVSRevisions. - - This class creates such files.""" - - def __init__(self, filename, serializer): - self.f = open(filename, 'w') - self.serializer = LinewiseSerializer(serializer) - - def add(self, cvs_rev): - self.f.write( - '%x %08x %s' % ( - cvs_rev.metadata_id, cvs_rev.timestamp, - self.serializer.dumps(cvs_rev), - ) - ) - - def close(self): - self.f.close() - self.f = None - - -class OldSortableCVSRevisionDatabase(object): - """A serially-accessible, sortable file for holding CVSRevisions. - - This class reads such files.""" - - def __init__(self, filename, serializer): - self.filename = filename - self.serializer = LinewiseSerializer(serializer) - - def __iter__(self): - f = open(self.filename, 'r') - for l in f: - s = l.split(' ', 2)[-1] - yield self.serializer.loads(s) - f.close() - - def close(self): - pass - - -class NewSortableCVSSymbolDatabase(object): - """A serially-accessible, sortable file for holding CVSSymbols. - - This class creates such files.""" - - def __init__(self, filename, serializer): - self.f = open(filename, 'w') - self.serializer = LinewiseSerializer(serializer) - - def add(self, cvs_symbol): - self.f.write( - '%x %s' % (cvs_symbol.symbol.id, self.serializer.dumps(cvs_symbol)) - ) - - def close(self): - self.f.close() - self.f = None - - -class OldSortableCVSSymbolDatabase(object): - """A serially-accessible, sortable file for holding CVSSymbols. - - This class reads such files.""" - - def __init__(self, filename, serializer): - self.filename = filename - self.serializer = LinewiseSerializer(serializer) - - def __iter__(self): - f = open(self.filename, 'r') - for l in f: - s = l.split(' ', 1)[-1] - yield self.serializer.loads(s) - f.close() - - def close(self): - pass - - -def IndexedCVSItemStore(filename, index_filename, mode): - return IndexedStore( - filename, index_filename, mode, - PrimedPickleSerializer(cvs_item_primer) - ) - - diff --git a/cvs2svn_lib/cvs_revision_manager.py b/cvs2svn_lib/cvs_revision_manager.py deleted file mode 100644 index 6f5de3b..0000000 --- a/cvs2svn_lib/cvs_revision_manager.py +++ /dev/null @@ -1,85 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2000-2008 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""Access the CVS repository via CVS's 'cvs' command.""" - - -from cvs2svn_lib.common import FatalError -from cvs2svn_lib.process import check_command_runs -from cvs2svn_lib.process import PipeStream -from cvs2svn_lib.process import CommandFailedException -from cvs2svn_lib.revision_manager import RevisionReader - - -class CVSRevisionReader(RevisionReader): - """A RevisionReader that reads the contents via CVS.""" - - # Different versions of CVS support different global arguments. - # Here are the global arguments that we try to use, in order of - # decreasing preference: - _possible_global_arguments = [ - ['-q', '-R', '-f'], - ['-q', '-R'], - ['-q', '-f'], - ['-q'], - ] - - def __init__(self, cvs_executable): - self.cvs_executable = cvs_executable - - for global_arguments in self._possible_global_arguments: - try: - self._check_cvs_runs(global_arguments) - except CommandFailedException, e: - pass - else: - # Those global arguments were OK; use them for all CVS invocations. - self.global_arguments = global_arguments - break - else: - raise FatalError( - '%s\n' - 'Please check that cvs is installed and in your PATH.' % (e,) - ) - - def _check_cvs_runs(self, global_arguments): - """Check that CVS can be started. - - Try running 'cvs --version' with the current setting for - self.cvs_executable and the specified global_arguments. If not - successful, raise a CommandFailedException.""" - - check_command_runs( - [self.cvs_executable] + global_arguments + ['--version'], - self.cvs_executable, - ) - - def get_content_stream(self, cvs_rev, suppress_keyword_substitution=False): - project = cvs_rev.cvs_file.project - pipe_cmd = [ - self.cvs_executable - ] + self.global_arguments + [ - '-d', project.cvs_repository_root, - 'co', - '-r' + cvs_rev.rev, - '-p' - ] - if suppress_keyword_substitution: - pipe_cmd.append('-kk') - pipe_cmd.append(project.cvs_module + cvs_rev.cvs_path) - return PipeStream(pipe_cmd) - - diff --git a/cvs2svn_lib/database.py b/cvs2svn_lib/database.py deleted file mode 100644 index 9db9be2..0000000 --- a/cvs2svn_lib/database.py +++ /dev/null @@ -1,322 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2000-2009 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""This module contains database facilities used by cvs2svn.""" - - -import sys -import os -import cPickle - -from cvs2svn_lib.common import DB_OPEN_READ -from cvs2svn_lib.common import DB_OPEN_WRITE -from cvs2svn_lib.common import DB_OPEN_NEW -from cvs2svn_lib.common import warning_prefix -from cvs2svn_lib.common import error_prefix -from cvs2svn_lib.log import Log -from cvs2svn_lib.record_table import FileOffsetPacker -from cvs2svn_lib.record_table import RecordTable - - -# DBM module selection - -# 1. If we have bsddb3, it is probably newer than bsddb. Fake bsddb = bsddb3, -# so that the dbhash module used by anydbm will use bsddb3. -try: - import bsddb3 - sys.modules['bsddb'] = sys.modules['bsddb3'] -except ImportError: - pass - -# 2. These DBM modules are not good for cvs2svn. -import anydbm -if anydbm._defaultmod.__name__ in ['dumbdbm', 'dbm']: - Log().error( - '%s: cvs2svn uses the anydbm package, which depends on lower level ' - 'dbm\n' - 'libraries. Your system has %s, with which cvs2svn is known to have\n' - 'problems. To use cvs2svn, you must install a Python dbm library ' - 'other than\n' - 'dumbdbm or dbm. See ' - 'http://python.org/doc/current/lib/module-anydbm.html\n' - 'for more information.\n' - % (error_prefix, anydbm._defaultmod.__name__,) - ) - sys.exit(1) - -# 3. If we are using the old bsddb185 module, then try prefer gdbm instead. -# Unfortunately, gdbm appears not to be trouble free, either. -if hasattr(anydbm._defaultmod, 'bsddb') \ - and not hasattr(anydbm._defaultmod.bsddb, '__version__'): - try: - gdbm = __import__('gdbm') - except ImportError: - Log().warn( - '%s: The version of the bsddb module found on your computer ' - 'has been\n' - 'reported to malfunction on some datasets, causing KeyError ' - 'exceptions.\n' - % (warning_prefix,) - ) - else: - anydbm._defaultmod = gdbm - - -class Database: - """A database that uses a Serializer to store objects of a certain type. - - The serializer is stored in the database under the key - self.serializer_key. (This implies that self.serializer_key may not - be used as a key for normal entries.) - - The backing database is an anydbm-based DBM. - - """ - - serializer_key = '_.%$1\t;_ ' - - def __init__(self, filename, mode, serializer=None): - """Constructor. - - The database stores its Serializer, so none needs to be supplied - when opening an existing database.""" - - # pybsddb3 has a bug which prevents it from working with - # Berkeley DB 4.2 if you open the db with 'n' ("new"). This - # causes the DB_TRUNCATE flag to be passed, which is disallowed - # for databases protected by lock and transaction support - # (bsddb databases use locking from bsddb version 4.2.4 onwards). - # - # Therefore, manually perform the removal (we can do this, because - # we know that for bsddb - but *not* anydbm in general - the database - # consists of one file with the name we specify, rather than several - # based on that name). - if mode == DB_OPEN_NEW and anydbm._defaultmod.__name__ == 'dbhash': - if os.path.isfile(filename): - os.unlink(filename) - self.db = anydbm.open(filename, 'c') - else: - self.db = anydbm.open(filename, mode) - - # Import implementations for many mapping interface methods. - for meth_name in ('__delitem__', - '__iter__', 'has_key', '__contains__', 'iterkeys', 'clear'): - meth_ref = getattr(self.db, meth_name, None) - if meth_ref: - setattr(self, meth_name, meth_ref) - - if mode == DB_OPEN_NEW: - self.serializer = serializer - self.db[self.serializer_key] = cPickle.dumps(self.serializer) - else: - self.serializer = cPickle.loads(self.db[self.serializer_key]) - - def __getitem__(self, key): - return self.serializer.loads(self.db[key]) - - def __setitem__(self, key, value): - self.db[key] = self.serializer.dumps(value) - - def __delitem__(self, key): - # gdbm defines a __delitem__ method, but it cannot be assigned. So - # this method provides a fallback definition via explicit delegation: - del self.db[key] - - def keys(self): - retval = self.db.keys() - retval.remove(self.serializer_key) - return retval - - def __iter__(self): - for key in self.keys(): - yield key - - def has_key(self, key): - try: - self.db[key] - return True - except KeyError: - return False - - def __contains__(self, key): - return self.has_key(key) - - def iterkeys(self): - return self.__iter__() - - def clear(self): - for key in self.keys(): - del self[key] - - def items(self): - return [(key, self[key],) for key in self.keys()] - - def values(self): - return [self[key] for key in self.keys()] - - def get(self, key, default=None): - try: - return self[key] - except KeyError: - return default - - def close(self): - self.db.close() - self.db = None - - -class IndexedDatabase: - """A file of objects that are written sequentially and read randomly. - - The objects are indexed by small non-negative integers, and a - RecordTable is used to store the index -> fileoffset map. - fileoffset=0 is used to represent an empty record. (An offset of 0 - cannot occur for a legitimate record because the serializer is - written there.) - - The main file consists of a sequence of pickles (or other serialized - data format). The zeroth record is a pickled Serializer. - Subsequent ones are objects serialized using the serializer. The - offset of each object in the file is stored to an index table so - that the data can later be retrieved randomly. - - Objects are always stored to the end of the file. If an object is - deleted or overwritten, the fact is recorded in the index_table but - the space in the pickle file is not garbage collected. This has the - advantage that one can create a modified version of a database that - shares the main data file with an old version by copying the index - file. But it has the disadvantage that space is wasted whenever - objects are written multiple times.""" - - def __init__(self, filename, index_filename, mode, serializer=None): - """Initialize an IndexedDatabase, writing the serializer if necessary. - - SERIALIZER is only used if MODE is DB_OPEN_NEW; otherwise the - serializer is read from the file.""" - - self.filename = filename - self.index_filename = index_filename - self.mode = mode - if self.mode == DB_OPEN_NEW: - self.f = open(self.filename, 'wb+') - elif self.mode == DB_OPEN_WRITE: - self.f = open(self.filename, 'rb+') - elif self.mode == DB_OPEN_READ: - self.f = open(self.filename, 'rb') - else: - raise RuntimeError('Invalid mode %r' % self.mode) - - self.index_table = RecordTable( - self.index_filename, self.mode, FileOffsetPacker() - ) - - if self.mode == DB_OPEN_NEW: - assert serializer is not None - self.serializer = serializer - cPickle.dump(self.serializer, self.f, -1) - else: - # Read the memo from the first pickle: - self.serializer = cPickle.load(self.f) - - # Seek to the end of the file, and record that position: - self.f.seek(0, 2) - self.fp = self.f.tell() - self.eofp = self.fp - - def __setitem__(self, index, item): - """Write ITEM into the database indexed by INDEX.""" - - # Make sure we're at the end of the file: - if self.fp != self.eofp: - self.f.seek(self.eofp) - self.index_table[index] = self.eofp - s = self.serializer.dumps(item) - self.f.write(s) - self.eofp += len(s) - self.fp = self.eofp - - def _fetch(self, offset): - if self.fp != offset: - self.f.seek(offset) - - # There is no easy way to tell how much data will be read, so just - # indicate that we don't know the current file pointer: - self.fp = None - - return self.serializer.loadf(self.f) - - def iterkeys(self): - return self.index_table.iterkeys() - - def itervalues(self): - for offset in self.index_table.itervalues(): - yield self._fetch(offset) - - def __getitem__(self, index): - offset = self.index_table[index] - return self._fetch(offset) - - def get(self, item, default=None): - try: - return self[item] - except KeyError: - return default - - def get_many(self, indexes, default=None): - """Yield (index,item) tuples for INDEXES, in arbitrary order. - - Yield (index,default) for indexes with no defined values.""" - - offsets = [] - for (index, offset) in self.index_table.get_many(indexes): - if offset is None: - yield (index, default) - else: - offsets.append((offset, index)) - - # Sort the offsets to reduce disk seeking: - offsets.sort() - for (offset,index) in offsets: - yield (index, self._fetch(offset)) - - def __delitem__(self, index): - # We don't actually free the data in self.f. - del self.index_table[index] - - def close(self): - self.index_table.close() - self.index_table = None - self.f.close() - self.f = None - - def __str__(self): - return 'IndexedDatabase(%r)' % (self.filename,) - - -class IndexedStore(IndexedDatabase): - """A file of items that is written sequentially and read randomly. - - This is just like IndexedDatabase, except that it has an additional - add() method which assumes that the object to be written to the - database has an 'id' member, which is used as its database index. - See IndexedDatabase for more information.""" - - def add(self, item): - """Write ITEM into the database indexed by ITEM.id.""" - - self[item.id] = item - - diff --git a/cvs2svn_lib/dumpfile_delegate.py b/cvs2svn_lib/dumpfile_delegate.py deleted file mode 100644 index 092cfca..0000000 --- a/cvs2svn_lib/dumpfile_delegate.py +++ /dev/null @@ -1,510 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2000-2009 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""This module contains database facilities used by cvs2svn.""" - - -try: - from hashlib import md5 -except ImportError: - from md5 import new as md5 - - -from cvs2svn_lib import config -from cvs2svn_lib.common import FatalError -from cvs2svn_lib.common import InternalError -from cvs2svn_lib.common import path_split -from cvs2svn_lib.context import Ctx -from cvs2svn_lib.cvs_file import CVSDirectory -from cvs2svn_lib.cvs_file import CVSFile -from cvs2svn_lib.svn_repository_delegate import SVNRepositoryDelegate -from cvs2svn_lib.apple_single_filter import get_maybe_apple_single_stream - - -# Things that can happen to a file. -OP_ADD = 'add' -OP_CHANGE = 'change' - - -class DumpfileDelegate(SVNRepositoryDelegate): - """Create a Subversion dumpfile.""" - - def __init__(self, revision_reader, dumpfile_path): - """Return a new DumpfileDelegate instance, attached to a dumpfile - DUMPFILE_PATH, using Ctx().cvs_filename_decoder().""" - - self._revision_reader = revision_reader - self.dumpfile_path = dumpfile_path - - self.dumpfile = open(self.dumpfile_path, 'wb') - self._write_dumpfile_header(self.dumpfile) - - # A set of the basic project infrastructure project directories - # that have been created so far, as SVN paths. (The root - # directory is considered to be present at initialization.) This - # includes all of the LOD paths, and all of their parent - # directories etc. - self._basic_directories = set(['']) - - def _write_dumpfile_header(self, dumpfile): - # Initialize the dumpfile with the standard headers. - # - # Since the CVS repository doesn't have a UUID, and the Subversion - # repository will be created with one anyway, we don't specify a - # UUID in the dumpflie - dumpfile.write('SVN-fs-dump-format-version: 2\n\n') - - def _utf8_path(self, path): - """Return a copy of PATH encoded in UTF-8.""" - - # Convert each path component separately (as they may each use - # different encodings). - try: - return '/'.join([ - Ctx().cvs_filename_decoder(piece).encode('utf8') - for piece in path.split('/') - ]) - except UnicodeError: - raise FatalError( - "Unable to convert a path '%s' to internal encoding.\n" - "Consider rerunning with one or more '--encoding' parameters or\n" - "with '--fallback-encoding'." - % (path,)) - - def _string_for_prop(self, name, value): - """Return a property in the form needed for the dumpfile.""" - - return 'K %d\n%s\nV %d\n%s\n' % (len(name), name, len(value), value) - - def start_commit(self, revnum, revprops): - """Emit the start of SVN_COMMIT (an SVNCommit).""" - - self.revision = revnum - - # The start of a new commit typically looks like this: - # - # Revision-number: 1 - # Prop-content-length: 129 - # Content-length: 129 - # - # K 7 - # svn:log - # V 27 - # Log message for revision 1. - # K 10 - # svn:author - # V 7 - # jrandom - # K 8 - # svn:date - # V 27 - # 2003-04-22T22:57:58.132837Z - # PROPS-END - # - # Notice that the length headers count everything -- not just the - # length of the data but also the lengths of the lengths, including - # the 'K ' or 'V ' prefixes. - # - # The reason there are both Prop-content-length and Content-length - # is that the former includes just props, while the latter includes - # everything. That's the generic header form for any entity in a - # dumpfile. But since revisions only have props, the two lengths - # are always the same for revisions. - - # Calculate the output needed for the property definitions. - prop_names = revprops.keys() - prop_names.sort() - prop_strings = [] - for propname in prop_names: - if revprops[propname] is not None: - prop_strings.append( - self._string_for_prop(propname, revprops[propname])) - - all_prop_strings = ''.join(prop_strings) + 'PROPS-END\n' - total_len = len(all_prop_strings) - - # Print the revision header and revprops - self.dumpfile.write( - 'Revision-number: %d\n' - 'Prop-content-length: %d\n' - 'Content-length: %d\n' - '\n' - '%s' - '\n' - % (self.revision, total_len, total_len, all_prop_strings) - ) - - def end_commit(self): - pass - - def _make_any_dir(self, path): - """Emit the creation of directory PATH.""" - - self.dumpfile.write( - "Node-path: %s\n" - "Node-kind: dir\n" - "Node-action: add\n" - "\n" - "\n" - % self._utf8_path(path) - ) - - def _register_basic_directory(self, path, create): - """Register the creation of PATH if it is not already there. - - Create any parent directories that do not already exist. If - CREATE is set, also create PATH if it doesn't already exist. This - method should only be used for the LOD paths and the directories - containing them, not for directories within an LOD path.""" - - if path not in self._basic_directories: - # Make sure that the parent directory is present: - self._register_basic_directory(path_split(path)[0], True) - if create: - self._make_any_dir(path) - self._basic_directories.add(path) - - def initialize_project(self, project): - """Create any initial directories for the project. - - The trunk, tags, and branches directories directories are created - the first time the project is seen. Be sure not to create parent - directories that already exist (e.g., because two directories - share part of their paths either within or across projects).""" - - for path in project.get_initial_directories(): - self._register_basic_directory(path, True) - - def initialize_lod(self, lod): - lod_path = lod.get_path() - if lod_path: - self._register_basic_directory(lod_path, True) - - def mkdir(self, lod, cvs_directory): - self._make_any_dir(lod.get_path(cvs_directory.cvs_path)) - - def _add_or_change_path(self, s_item, op): - """Emit the addition or change corresponding to S_ITEM. - - OP is either the constant OP_ADD or OP_CHANGE.""" - - assert op in [OP_ADD, OP_CHANGE] - - # Convenience variables - cvs_rev = s_item.cvs_rev - - # The property handling here takes advantage of an undocumented - # but IMHO consistent feature of the Subversion dumpfile-loading - # code. When a node's properties aren't mentioned (that is, the - # "Prop-content-length:" header is absent, no properties are - # listed at all, and there is no "PROPS-END\n" line) then no - # change is made to the node's properties. - # - # This is consistent with the way dumpfiles behave w.r.t. text - # content changes, so I'm comfortable relying on it. If you - # commit a change to *just* the properties of some node that - # already has text contents from a previous revision, then in the - # dumpfile output for the prop change, no "Text-content-length:" - # nor "Text-content-md5:" header will be present, and the text of - # the file will not be given. But this does not cause the file's - # text to be erased! It simply remains unchanged. - # - # This works out great for cvs2svn, due to lucky coincidences: - # - # For files, the only properties we ever set are set in the first - # revision; all other revisions (including on branches) inherit - # from that. After the first revision, we never change file - # properties, therefore, there is no need to remember the full set - # of properties on a given file once we've set it. - # - # For directories, the only property we set is "svn:ignore", and - # while we may change it after the first revision, we always do so - # based on the contents of a ".cvsignore" file -- in other words, - # CVS is doing the remembering for us, so we still don't have to - # preserve the previous value of the property ourselves. - - # Calculate the (sorted-by-name) property string and length, if any. - if s_item.svn_props_changed: - svn_props = s_item.svn_props - prop_contents = '' - prop_names = svn_props.keys() - prop_names.sort() - for pname in prop_names: - pvalue = svn_props[pname] - if pvalue is not None: - prop_contents += self._string_for_prop(pname, pvalue) - prop_contents += 'PROPS-END\n' - props_header = 'Prop-content-length: %d\n' % len(prop_contents) - else: - prop_contents = '' - props_header = '' - - # If the file has keywords, we must prevent CVS/RCS from expanding - # the keywords because they must be unexpanded in the repository, - # or Subversion will get confused. - stream = self._revision_reader.get_content_stream( - cvs_rev, suppress_keyword_substitution=s_item.has_keywords() - ) - - if Ctx().decode_apple_single: - # Insert a filter to decode any files that are in AppleSingle - # format: - stream = get_maybe_apple_single_stream(stream) - - # Insert a filter to convert all EOLs to LFs if neccessary - - eol_style = s_item.svn_props.get('svn:eol-style', None) - if eol_style: - stream = LF_EOL_Filter(stream, eol_style) - - buf = None - - # treat .cvsignore as a directory property - dir_path, basename = path_split(cvs_rev.get_svn_path()) - if basename == '.cvsignore': - buf = stream.read() - ignore_vals = generate_ignores(buf) - ignore_contents = '\n'.join(ignore_vals) - if ignore_contents: - ignore_contents += '\n' - ignore_contents = ('K 10\nsvn:ignore\nV %d\n%s\n' % \ - (len(ignore_contents), ignore_contents)) - ignore_contents += 'PROPS-END\n' - ignore_len = len(ignore_contents) - - # write headers, then props - self.dumpfile.write( - 'Node-path: %s\n' - 'Node-kind: dir\n' - 'Node-action: change\n' - 'Prop-content-length: %d\n' - 'Content-length: %d\n' - '\n' - '%s' - % (self._utf8_path(dir_path), - ignore_len, ignore_len, ignore_contents) - ) - if not Ctx().keep_cvsignore: - stream.close() - return - - self.dumpfile.write( - 'Node-path: %s\n' - 'Node-kind: file\n' - 'Node-action: %s\n' - '%s' # no property header if no props - % (self._utf8_path(cvs_rev.get_svn_path()), op, props_header) - ) - - pos = self.dumpfile.tell() - - content_header_fmt = ( - 'Text-content-length: %16d\n' - 'Text-content-md5: %32s\n' - 'Content-length: %16d\n' - '\n' - ) - - self.dumpfile.write(content_header_fmt % (0, '', 0,)) - - if prop_contents: - self.dumpfile.write(prop_contents) - - # Insert the rev contents, calculating length and checksum as we go. - checksum = md5() - length = 0 - if buf is None: - buf = stream.read(config.PIPE_READ_SIZE) - while buf != '': - checksum.update(buf) - length += len(buf) - self.dumpfile.write(buf) - buf = stream.read(config.PIPE_READ_SIZE) - - stream.close() - - # Go back to overwrite the length and checksum headers with the - # correct values. The content length is the length of property - # data, text data, and any metadata around/inside around them: - self.dumpfile.seek(pos, 0) - self.dumpfile.write( - content_header_fmt - % (length, checksum.hexdigest(), length + len(prop_contents),) - ) - - # Jump back to the end of the stream - self.dumpfile.seek(0, 2) - - # This record is done (write two newlines -- one to terminate - # contents that weren't themselves newline-termination, one to - # provide a blank line for readability. - self.dumpfile.write('\n\n') - - def add_path(self, s_item): - """Emit the addition corresponding to S_ITEM, an SVNCommitItem.""" - - self._add_or_change_path(s_item, OP_ADD) - - def change_path(self, s_item): - """Emit the change corresponding to S_ITEM, an SVNCommitItem.""" - - self._add_or_change_path(s_item, OP_CHANGE) - - def delete_lod(self, lod): - """Emit the deletion of LOD.""" - - self.dumpfile.write( - 'Node-path: %s\n' - 'Node-action: delete\n' - '\n' - % (self._utf8_path(lod.get_path()),) - ) - self._basic_directories.remove(lod.get_path()) - - def delete_path(self, lod, cvs_path): - dir_path, basename = path_split(lod.get_path(cvs_path.get_cvs_path())) - if basename == '.cvsignore': - # When a .cvsignore file is deleted, the directory's svn:ignore - # property needs to be deleted. - ignore_contents = 'PROPS-END\n' - ignore_len = len(ignore_contents) - - # write headers, then props - self.dumpfile.write( - 'Node-path: %s\n' - 'Node-kind: dir\n' - 'Node-action: change\n' - 'Prop-content-length: %d\n' - 'Content-length: %d\n' - '\n' - '%s' - % (self._utf8_path(dir_path), - ignore_len, ignore_len, ignore_contents) - ) - if not Ctx().keep_cvsignore: - return - - self.dumpfile.write( - 'Node-path: %s\n' - 'Node-action: delete\n' - '\n' - % (self._utf8_path(lod.get_path(cvs_path.cvs_path)),) - ) - - def copy_lod(self, src_lod, dest_lod, src_revnum): - # Register the main LOD directory, and create parent directories - # as needed: - self._register_basic_directory(dest_lod.get_path(), False) - - self.dumpfile.write( - 'Node-path: %s\n' - 'Node-kind: dir\n' - 'Node-action: add\n' - 'Node-copyfrom-rev: %d\n' - 'Node-copyfrom-path: %s\n' - '\n' - % (self._utf8_path(dest_lod.get_path()), - src_revnum, self._utf8_path(src_lod.get_path())) - ) - - def copy_path(self, cvs_path, src_lod, dest_lod, src_revnum): - if isinstance(cvs_path, CVSFile): - node_kind = 'file' - if cvs_path.basename == '.cvsignore': - # FIXME: Here we have to adjust the containing directory's - # svn:ignore property to reflect the addition of the - # .cvsignore file to the LOD! This is awkward because we - # don't have the contents of the .cvsignore file available. - if not Ctx().keep_cvsignore: - return - elif isinstance(cvs_path, CVSDirectory): - node_kind = 'dir' - else: - raise InternalError() - - self.dumpfile.write( - 'Node-path: %s\n' - 'Node-kind: %s\n' - 'Node-action: add\n' - 'Node-copyfrom-rev: %d\n' - 'Node-copyfrom-path: %s\n' - '\n' - % ( - self._utf8_path(dest_lod.get_path(cvs_path.cvs_path)), - node_kind, - src_revnum, - self._utf8_path(src_lod.get_path(cvs_path.cvs_path)) - ) - ) - - def finish(self): - """Perform any cleanup necessary after all revisions have been - committed.""" - - self.dumpfile.close() - - -def generate_ignores(raw_ignore_val): - ignore_vals = [ ] - for ignore in raw_ignore_val.split(): - # Reset the list if we encounter a '!' - # See http://cvsbook.red-bean.com/cvsbook.html#cvsignore - if ignore == '!': - ignore_vals = [ ] - else: - ignore_vals.append(ignore) - return ignore_vals - - -class LF_EOL_Filter: - """Filter a stream and convert all end-of-line markers (CRLF, CR or LF) - into the appropriate canonical eol style.""" - - eol_style_replacements = { - 'LF' : '\n', - 'CR' : '\r', - 'CRLF' : '\r\n', - 'native' : '\n', - } - - def __init__(self, stream, eol_style): - self.stream = stream - self.replacement = self.eol_style_replacements[eol_style] - self.carry_cr = False - self.eof = False - - def read(self, size=-1): - while True: - buf = self.stream.read(size) - self.eof = len(buf) == 0 - if self.carry_cr: - buf = '\r' + buf - self.carry_cr = False - if not self.eof and buf[-1] == '\r': - self.carry_cr = True - buf = buf[:-1] - buf = buf.replace('\r\n', '\n') - buf = buf.replace('\r', '\n') - if self.replacement != '\n': - buf = buf.replace('\n', self.replacement) - if buf or self.eof: - return buf - - def close(self): - self.stream.close() - self.stream = None - - diff --git a/cvs2svn_lib/fill_source.py b/cvs2svn_lib/fill_source.py deleted file mode 100644 index 2bb8e4c..0000000 --- a/cvs2svn_lib/fill_source.py +++ /dev/null @@ -1,192 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2000-2008 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""This module contains classes describing the sources of symbol fills.""" - - -from cvs2svn_lib.common import InternalError -from cvs2svn_lib.common import FatalError -from cvs2svn_lib.common import SVN_INVALID_REVNUM -from cvs2svn_lib.svn_revision_range import SVNRevisionRange -from cvs2svn_lib.svn_revision_range import RevisionScores - - -class FillSource: - """Representation of a fill source. - - A FillSource keeps track of the paths that have to be filled in a - particular symbol fill. - - This class holds a SVNRevisionRange instance for each CVSFile that - has to be filled within the subtree of the repository rooted at - self.cvs_path. The SVNRevisionRange objects are stored in a tree - in which the directory nodes are dictionaries mapping CVSPaths to - subnodes and the leaf nodes are the SVNRevisionRange objects telling - for what source_lod and what range of revisions the leaf could serve - as a source. - - FillSource objects are able to compute the score for arbitrary - source LODs and source revision numbers. - - These objects are used by the symbol filler in SVNOutputOption.""" - - def __init__(self, cvs_path, symbol, node_tree): - """Create a fill source. - - The best LOD and SVN REVNUM to use as the copy source can be - determined by calling compute_best_source(). - - Members: - - cvs_path -- (CVSPath): the CVSPath described by this FillSource. - - _symbol -- (Symbol) the symbol to be filled. - - _node_tree -- (dict) a tree stored as a map { CVSPath : node }, - where subnodes have the same form. Leaves are - SVNRevisionRange instances telling the source_lod and range - of SVN revision numbers from which the CVSPath can be - copied. - - """ - - self.cvs_path = cvs_path - self._symbol = symbol - self._node_tree = node_tree - - def _set_node(self, cvs_file, svn_revision_range): - parent_node = self._get_node(cvs_file.parent_directory, create=True) - if cvs_file in parent_node: - raise InternalError( - '%s appeared twice in sources for %s' % (cvs_file, self._symbol) - ) - parent_node[cvs_file] = svn_revision_range - - def _get_node(self, cvs_path, create=False): - if cvs_path == self.cvs_path: - return self._node_tree - else: - parent_node = self._get_node(cvs_path.parent_directory, create=create) - try: - return parent_node[cvs_path] - except KeyError: - if create: - node = {} - parent_node[cvs_path] = node - return node - else: - raise - - def compute_best_source(self, preferred_source): - """Determine the best source_lod and subversion revision number to copy. - - Return the best source found, as an SVNRevisionRange instance. If - PREFERRED_SOURCE is not None and its opening is among the sources - with the best scores, return it; otherwise, return the oldest such - revision on the first such source_lod (ordered by the natural LOD - sort order). The return value's source_lod is the best LOD to - copy from, and its opening_revnum is the best SVN revision.""" - - # Aggregate openings and closings from our rev tree - svn_revision_ranges = self._get_revision_ranges(self._node_tree) - - # Score the lists - revision_scores = RevisionScores(svn_revision_ranges) - - best_source_lod, best_revnum, best_score = \ - revision_scores.get_best_revnum() - - if ( - preferred_source is not None - and revision_scores.get_score(preferred_source) == best_score - ): - best_source_lod = preferred_source.source_lod - best_revnum = preferred_source.opening_revnum - - if best_revnum == SVN_INVALID_REVNUM: - raise FatalError( - "failed to find a revision to copy from when copying %s" - % self._symbol.name - ) - - return SVNRevisionRange(best_source_lod, best_revnum) - - def _get_revision_ranges(self, node): - """Return a list of all the SVNRevisionRanges at and under NODE. - - Include duplicates. This is a helper method used by - compute_best_source().""" - - if isinstance(node, SVNRevisionRange): - # It is a leaf node. - return [ node ] - else: - # It is an intermediate node. - revision_ranges = [] - for key, subnode in node.items(): - revision_ranges.extend(self._get_revision_ranges(subnode)) - return revision_ranges - - def get_subsources(self): - """Generate (CVSPath, FillSource) for all direct subsources.""" - - if not isinstance(self._node_tree, SVNRevisionRange): - for cvs_path, node in self._node_tree.items(): - fill_source = FillSource(cvs_path, self._symbol, node) - yield (cvs_path, fill_source) - - def get_subsource_map(self): - """Return the map {CVSPath : FillSource} of direct subsources.""" - - src_entries = {} - - for (cvs_path, fill_subsource) in self.get_subsources(): - src_entries[cvs_path] = fill_subsource - - return src_entries - - def __str__(self): - """For convenience only. The format is subject to change at any time.""" - - return '%s(%s:%s)' % ( - self.__class__.__name__, self._symbol, self.cvs_path, - ) - - def __repr__(self): - """For convenience only. The format is subject to change at any time.""" - - return '%s%r' % (self, self._node_tree,) - - -def get_source_set(symbol, range_map): - """Return a FillSource describing the fill sources for RANGE_MAP. - - SYMBOL is either a Branch or a Tag. RANGE_MAP is a map { CVSSymbol - : SVNRevisionRange } as returned by - SymbolingsReader.get_range_map(). - - Use the SVNRevisionRanges from RANGE_MAP to create a FillSource - instance describing the sources for filling SYMBOL.""" - - root_cvs_directory = symbol.project.get_root_cvs_directory() - fill_source = FillSource(root_cvs_directory, symbol, {}) - - for cvs_symbol, svn_revision_range in range_map.items(): - fill_source._set_node(cvs_symbol.cvs_file, svn_revision_range) - - return fill_source - - diff --git a/cvs2svn_lib/fulltext_revision_recorder.py b/cvs2svn_lib/fulltext_revision_recorder.py deleted file mode 100644 index ad057b7..0000000 --- a/cvs2svn_lib/fulltext_revision_recorder.py +++ /dev/null @@ -1,127 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2007-2009 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""An abstract class that contructs file contents during CollectRevsPass. - -It calls its record_fulltext() method with the full text of every -revision. This method should be overridden to do something with the -fulltext and possibly return a revision_recorder_token.""" - - -from cvs2svn_lib.revision_manager import RevisionRecorder - - -class FulltextRevisionRecorder: - """Similar to a RevisionRecorder, but it requires the fulltext.""" - - def register_artifacts(self, which_pass): - pass - - def start(self): - pass - - def start_file(self, cvs_file_items): - pass - - def record_fulltext(self, cvs_rev, log, fulltext): - """Record the fulltext for CVS_REV. - - CVS_REV has the log message LOG and the fulltext FULLTEXT. This - method should be overridden to do something sensible with them.""" - - raise NotImplementedError() - - def finish_file(self, cvs_file_items): - pass - - def finish(self): - pass - - -class FulltextRevisionRecorderAdapter(RevisionRecorder): - """Reconstruct the fulltext and pass it to a FulltextRevisionRecorder. - - This class implements RevisionRecorder (so it can be passed directly - to CollectRevsPass). But it doesn't actually record anything. - Instead, it reconstructs the fulltext of each revision, and passes - the fulltext to a fulltext_revision_recorder.""" - - def __init__(self, fulltext_revision_recorder): - RevisionRecorder.__init__(self) - self.fulltext_revision_recorder = fulltext_revision_recorder - - def register_artifacts(self, which_pass): - self.fulltext_revision_recorder.register_artifacts(which_pass) - - def start(self): - self.fulltext_revision_recorder.start() - - def start_file(self, cvs_file_items): - self.fulltext_revision_recorder.start_file(cvs_file_items) - - def record_text(self, cvs_rev, log, text): - """This method should be overwridden. - - It should determine the fulltext of CVS_REV, then pass it to - self.fulltext_revision_recorder.record_fulltext() and return the - result.""" - - raise NotImplementedError() - - def finish_file(self, cvs_file_items): - self.fulltext_revision_recorder.finish_file(cvs_file_items) - - def finish(self): - self.fulltext_revision_recorder.finish() - - -class SimpleFulltextRevisionRecorderAdapter(FulltextRevisionRecorderAdapter): - """Reconstruct the fulltext using a RevisionReader. - - To create the fulltext, this class simply uses a RevisionReader (for - example, RCSRevisionReader or CVSRevisionReader). This is not quite - as wasteful as using one of these RevisionReaders in OutputPass, - because the same RCS file will be read over and over (and so - presumably stay in the disk cache). But it is still pretty silly, - considering that we have all the RCS deltas available to us.""" - - def __init__(self, revision_reader, fulltext_revision_recorder): - FulltextRevisionRecorderAdapter.__init__(self, fulltext_revision_recorder) - self.revision_reader = revision_reader - - def register_artifacts(self, which_pass): - FulltextRevisionRecorderAdapter.register_artifacts(self, which_pass) - self.revision_reader.register_artifacts(which_pass) - - def start(self): - FulltextRevisionRecorderAdapter.start(self) - self.revision_reader.start() - - def record_text(self, cvs_rev, log, text): - # FIXME: We have to decide what to do about keyword substitution - # and eol_style here: - fulltext = self.revision_reader.get_content_stream( - cvs_rev, suppress_keyword_substitution=False - ).read() - return self.fulltext_revision_recorder.record_fulltext( - cvs_rev, log, fulltext - ) - - def finish(self): - FulltextRevisionRecorderAdapter.finish(self) - self.revision_reader.finish() - - diff --git a/cvs2svn_lib/git_output_option.py b/cvs2svn_lib/git_output_option.py deleted file mode 100644 index a1e46b9..0000000 --- a/cvs2svn_lib/git_output_option.py +++ /dev/null @@ -1,658 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2007-2009 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""Classes for outputting the converted repository to git. - -For information about the format allowed by git-fast-import, see: - - http://www.kernel.org/pub/software/scm/git/docs/git-fast-import.html - -""" - -import bisect - -from cvs2svn_lib import config -from cvs2svn_lib.common import InternalError -from cvs2svn_lib.common import FatalError -from cvs2svn_lib.log import Log -from cvs2svn_lib.context import Ctx -from cvs2svn_lib.artifact_manager import artifact_manager -from cvs2svn_lib.openings_closings import SymbolingsReader -from cvs2svn_lib.symbol import Trunk -from cvs2svn_lib.symbol import Branch -from cvs2svn_lib.symbol import Tag -from cvs2svn_lib.cvs_item import CVSRevisionAdd -from cvs2svn_lib.cvs_item import CVSRevisionChange -from cvs2svn_lib.cvs_item import CVSRevisionDelete -from cvs2svn_lib.cvs_item import CVSRevisionNoop -from cvs2svn_lib.cvs_item import CVSSymbol -from cvs2svn_lib.output_option import OutputOption -from cvs2svn_lib.svn_revision_range import RevisionScores -from cvs2svn_lib.repository_mirror import RepositoryMirror -from cvs2svn_lib.key_generator import KeyGenerator - - -# The branch name to use for the "tag fixup branches". The -# git-fast-import documentation suggests using 'TAG_FIXUP' (outside of -# the refs/heads namespace), but this is currently broken. Use a name -# containing '.', which is not allowed in CVS symbols, to avoid -# conflicts (though of course a conflict could still result if the -# user requests symbol transformations). -FIXUP_BRANCH_NAME = 'refs/heads/TAG.FIXUP' - - -class ExpectedDirectoryError(Exception): - """A file was found where a directory was expected.""" - - pass - - -class ExpectedFileError(Exception): - """A directory was found where a file was expected.""" - - pass - - -class GitRevisionWriter(object): - def register_artifacts(self, which_pass): - pass - - def start(self, f, mirror): - self.f = f - self._mirror = mirror - - def _modify_file(self, cvs_item, post_commit): - raise NotImplementedError() - - def _mkdir_p(self, cvs_directory, lod): - """Make sure that CVS_DIRECTORY exists in LOD. - - If not, create it. Return the node for CVS_DIRECTORY.""" - - try: - node = self._mirror.get_current_lod_directory(lod) - except KeyError: - node = self._mirror.add_lod(lod) - - for sub_path in cvs_directory.get_ancestry()[1:]: - try: - node = node[sub_path] - except KeyError: - node = node.mkdir(sub_path) - if node is None: - raise ExpectedDirectoryError( - 'File found at \'%s\' where directory was expected.' % (sub_path,) - ) - - return node - - def add_file(self, cvs_rev, post_commit): - cvs_file = cvs_rev.cvs_file - if post_commit: - lod = cvs_file.project.get_trunk() - else: - lod = cvs_rev.lod - parent_node = self._mkdir_p(cvs_file.parent_directory, lod) - parent_node.add_file(cvs_file) - self._modify_file(cvs_rev, post_commit) - - def modify_file(self, cvs_rev, post_commit): - cvs_file = cvs_rev.cvs_file - if post_commit: - lod = cvs_file.project.get_trunk() - else: - lod = cvs_rev.lod - if self._mirror.get_current_path(cvs_file, lod) is not None: - raise ExpectedFileError( - 'Directory found at \'%s\' where file was expected.' % (cvs_file,) - ) - self._modify_file(cvs_rev, post_commit) - - def delete_file(self, cvs_rev, post_commit): - cvs_file = cvs_rev.cvs_file - if post_commit: - lod = cvs_file.project.get_trunk() - else: - lod = cvs_rev.lod - parent_node = self._mirror.get_current_path( - cvs_file.parent_directory, lod - ) - if parent_node[cvs_file] is not None: - raise ExpectedFileError( - 'Directory found at \'%s\' where file was expected.' % (cvs_file,) - ) - del parent_node[cvs_file] - self.f.write('D %s\n' % (cvs_rev.cvs_file.cvs_path,)) - - def process_revision(self, cvs_rev, post_commit): - if isinstance(cvs_rev, CVSRevisionAdd): - self.add_file(cvs_rev, post_commit) - elif isinstance(cvs_rev, CVSRevisionChange): - self.modify_file(cvs_rev, post_commit) - elif isinstance(cvs_rev, CVSRevisionDelete): - self.delete_file(cvs_rev, post_commit) - elif isinstance(cvs_rev, CVSRevisionNoop): - pass - else: - raise InternalError('Unexpected CVSRevision type: %s' % (cvs_rev,)) - - def branch_file(self, cvs_symbol): - cvs_file = cvs_symbol.cvs_file - parent_node = self._mkdir_p(cvs_file.parent_directory, cvs_symbol.symbol) - parent_node.add_file(cvs_file) - self._modify_file(cvs_symbol, post_commit=False) - - def finish(self): - del self._mirror - del self.f - - -class GitRevisionMarkWriter(GitRevisionWriter): - def _modify_file(self, cvs_item, post_commit): - if cvs_item.cvs_file.executable: - mode = '100755' - else: - mode = '100644' - - self.f.write( - 'M %s :%d %s\n' - % (mode, cvs_item.revision_recorder_token, - cvs_item.cvs_file.cvs_path,) - ) - - -class GitRevisionInlineWriter(GitRevisionWriter): - def __init__(self, revision_reader): - self.revision_reader = revision_reader - - def register_artifacts(self, which_pass): - GitRevisionWriter.register_artifacts(self, which_pass) - self.revision_reader.register_artifacts(which_pass) - - def start(self, f, mirror): - GitRevisionWriter.start(self, f, mirror) - self.revision_reader.start() - - def _modify_file(self, cvs_item, post_commit): - if cvs_item.cvs_file.executable: - mode = '100755' - else: - mode = '100644' - - self.f.write( - 'M %s inline %s\n' - % (mode, cvs_item.cvs_file.cvs_path,) - ) - - if isinstance(cvs_item, CVSSymbol): - cvs_rev = cvs_item.get_cvs_revision_source(Ctx()._cvs_items_db) - else: - cvs_rev = cvs_item - - # FIXME: We have to decide what to do about keyword substitution - # and eol_style here: - fulltext = self.revision_reader.get_content_stream( - cvs_rev, suppress_keyword_substitution=False - ).read() - - self.f.write('data %d\n' % (len(fulltext),)) - self.f.write(fulltext) - self.f.write('\n') - - def finish(self): - GitRevisionWriter.finish(self) - self.revision_reader.finish() - - -def get_chunks(iterable, chunk_size): - """Generate lists containing chunks of the output of ITERABLE. - - Each list contains at most CHUNK_SIZE items. If CHUNK_SIZE is None, - yield the whole contents of ITERABLE in one list.""" - - if chunk_size is None: - yield list(iterable) - else: - it = iter(iterable) - while True: - # If this call to it.next() raises StopIteration, then we have - # no more chunks to emit, so simply pass the exception through: - chunk = [it.next()] - - # Now try filling the rest of the chunk: - try: - while len(chunk) < chunk_size: - chunk.append(it.next()) - except StopIteration: - # The iterator was exhausted while filling chunk, but chunk - # contains at least one element. Yield it, then we're done. - yield chunk - break - - # Yield the full chunk then continue with the next chunk: - yield chunk - del chunk - - -class GitOutputOption(OutputOption): - """An OutputOption that outputs to a git-fast-import formatted file. - - Members: - - dump_filename -- (string) the name of the file to which the - git-fast-import commands for defining revisions will be - written. - - author_transforms -- a map {cvsauthor : (fullname, email)} from - CVS author names to git full name and email address. All of - the contents are 8-bit strings encoded as UTF-8. - - """ - - # The first mark number used for git-fast-import commit marks. This - # value needs to be large to avoid conflicts with blob marks. - _first_commit_mark = 1000000000 - - def __init__( - self, dump_filename, revision_writer, - max_merges=None, author_transforms=None, - ): - """Constructor. - - DUMP_FILENAME is the name of the file to which the git-fast-import - commands for defining revisions should be written. (Please note - that depending on the style of revision writer, the actual file - contents might not be written to this file.) - - REVISION_WRITER is a GitRevisionWriter that is used to output - either the content of revisions or a mark that was previously used - to label a blob. - - MAX_MERGES can be set to an integer telling the maximum number of - parents that can be merged into a commit at once (aside from the - natural parent). If it is set to None, then there is no limit. - - AUTHOR_TRANSFORMS is a map {cvsauthor : (fullname, email)} from - CVS author names to git full name and email address. All of the - contents should either be Unicode strings or 8-bit strings encoded - as UTF-8. - - """ - - self.dump_filename = dump_filename - self.revision_writer = revision_writer - self.max_merges = max_merges - - def to_utf8(s): - if isinstance(s, unicode): - return s.encode('utf8') - else: - return s - - self.author_transforms = {} - if author_transforms is not None: - for (cvsauthor, (name, email,)) in author_transforms.iteritems(): - cvsauthor = to_utf8(cvsauthor) - name = to_utf8(name) - email = to_utf8(email) - self.author_transforms[cvsauthor] = (name, email,) - - self._mirror = RepositoryMirror() - - self._mark_generator = KeyGenerator(GitOutputOption._first_commit_mark) - - def register_artifacts(self, which_pass): - # These artifacts are needed for SymbolingsReader: - artifact_manager.register_temp_file_needed( - config.SYMBOL_OPENINGS_CLOSINGS_SORTED, which_pass - ) - artifact_manager.register_temp_file_needed( - config.SYMBOL_OFFSETS_DB, which_pass - ) - self.revision_writer.register_artifacts(which_pass) - self._mirror.register_artifacts(which_pass) - - def check(self): - if Ctx().cross_project_commits: - raise FatalError( - 'Git output is not supported with cross-project commits' - ) - if Ctx().cross_branch_commits: - raise FatalError( - 'Git output is not supported with cross-branch commits' - ) - if Ctx().username is None: - raise FatalError( - 'Git output requires a default commit username' - ) - - def check_symbols(self, symbol_map): - # FIXME: What constraints does git impose on symbols? - pass - - def setup(self, svn_rev_count): - self._symbolings_reader = SymbolingsReader() - self.f = open(self.dump_filename, 'wb') - - # The youngest revnum that has been committed so far: - self._youngest = 0 - - # A map {lod : [(revnum, mark)]} giving each of the revision - # numbers in which there was a commit to lod, and the mark active - # at the end of the revnum. - self._marks = {} - - self._mirror.open() - self.revision_writer.start(self.f, self._mirror) - - def _create_commit_mark(self, lod, revnum): - mark = self._mark_generator.gen_id() - self._set_lod_mark(lod, revnum, mark) - return mark - - def _set_lod_mark(self, lod, revnum, mark): - """Record MARK as the status of LOD for REVNUM. - - If there is already an entry for REVNUM, overwrite it. If not, - append a new entry to the self._marks list for LOD.""" - - assert revnum >= self._youngest - entry = (revnum, mark) - try: - modifications = self._marks[lod] - except KeyError: - # This LOD hasn't appeared before; create a new list and add the - # entry: - self._marks[lod] = [entry] - else: - # A record exists, so it necessarily has at least one element: - if modifications[-1][0] == revnum: - modifications[-1] = entry - else: - modifications.append(entry) - self._youngest = revnum - - def _get_author(self, svn_commit): - """Return the author to be used for SVN_COMMIT. - - Return the author in the form needed by git; that is, 'foo '.""" - - author = svn_commit.get_author() - (name, email,) = self.author_transforms.get(author, (author, author,)) - return '%s <%s>' % (name, email,) - - @staticmethod - def _get_log_msg(svn_commit): - return svn_commit.get_log_msg() - - def process_initial_project_commit(self, svn_commit): - self._mirror.start_commit(svn_commit.revnum) - self._mirror.end_commit() - - def process_primary_commit(self, svn_commit): - author = self._get_author(svn_commit) - log_msg = self._get_log_msg(svn_commit) - - lods = set() - for cvs_rev in svn_commit.get_cvs_items(): - lods.add(cvs_rev.lod) - if len(lods) != 1: - raise InternalError('Commit affects %d LODs' % (len(lods),)) - lod = lods.pop() - - self._mirror.start_commit(svn_commit.revnum) - if isinstance(lod, Trunk): - # FIXME: is this correct?: - self.f.write('commit refs/heads/master\n') - else: - self.f.write('commit refs/heads/%s\n' % (lod.name,)) - self.f.write( - 'mark :%d\n' - % (self._create_commit_mark(lod, svn_commit.revnum),) - ) - self.f.write( - 'committer %s %d +0000\n' % (author, svn_commit.date,) - ) - self.f.write('data %d\n' % (len(log_msg),)) - self.f.write('%s\n' % (log_msg,)) - for cvs_rev in svn_commit.get_cvs_items(): - self.revision_writer.process_revision(cvs_rev, post_commit=False) - - self.f.write('\n') - self._mirror.end_commit() - - def process_post_commit(self, svn_commit): - author = self._get_author(svn_commit) - log_msg = self._get_log_msg(svn_commit) - - source_lods = set() - for cvs_rev in svn_commit.cvs_revs: - source_lods.add(cvs_rev.lod) - if len(source_lods) != 1: - raise InternalError('Commit is from %d LODs' % (len(source_lods),)) - source_lod = source_lods.pop() - - self._mirror.start_commit(svn_commit.revnum) - # FIXME: is this correct?: - self.f.write('commit refs/heads/master\n') - self.f.write( - 'mark :%d\n' - % (self._create_commit_mark(None, svn_commit.revnum),) - ) - self.f.write( - 'committer %s %d +0000\n' % (author, svn_commit.date,) - ) - self.f.write('data %d\n' % (len(log_msg),)) - self.f.write('%s\n' % (log_msg,)) - self.f.write( - 'merge :%d\n' - % (self._get_source_mark(source_lod, svn_commit.revnum),) - ) - for cvs_rev in svn_commit.cvs_revs: - self.revision_writer.process_revision(cvs_rev, post_commit=True) - - self.f.write('\n') - self._mirror.end_commit() - - def _get_source_groups(self, svn_commit): - """Return groups of sources for SVN_COMMIT. - - SVN_COMMIT is an instance of SVNSymbolCommit. Yield tuples - (source_lod, svn_revnum, cvs_symbols) where source_lod is the line - of development and svn_revnum is the revision that should serve as - a source, and cvs_symbols is a list of CVSSymbolItems that can be - copied from that source. The groups are returned in arbitrary - order.""" - - # Get a map {CVSSymbol : SVNRevisionRange}: - range_map = self._symbolings_reader.get_range_map(svn_commit) - - # range_map, split up into one map per LOD; i.e., {LOD : - # {CVSSymbol : SVNRevisionRange}}: - lod_range_maps = {} - - for (cvs_symbol, range) in range_map.iteritems(): - lod_range_map = lod_range_maps.get(range.source_lod) - if lod_range_map is None: - lod_range_map = {} - lod_range_maps[range.source_lod] = lod_range_map - lod_range_map[cvs_symbol] = range - - # Sort the sources so that the branch that serves most often as - # parent is processed first: - lod_ranges = lod_range_maps.items() - lod_ranges.sort( - lambda (lod1,lod_range_map1),(lod2,lod_range_map2): - -cmp(len(lod_range_map1), len(lod_range_map2)) or cmp(lod1, lod2) - ) - - for (lod, lod_range_map) in lod_ranges: - while lod_range_map: - revision_scores = RevisionScores(lod_range_map.values()) - (source_lod, revnum, score) = revision_scores.get_best_revnum() - assert source_lod == lod - cvs_symbols = [] - for (cvs_symbol, range) in lod_range_map.items(): - if revnum in range: - cvs_symbols.append(cvs_symbol) - del lod_range_map[cvs_symbol] - yield (lod, revnum, cvs_symbols) - - def _get_all_files(self, node): - """Generate all of the CVSFiles under NODE.""" - - for cvs_path in node: - subnode = node[cvs_path] - if subnode is None: - yield cvs_path - else: - for sub_cvs_path in self._get_all_files(subnode): - yield sub_cvs_path - - def _is_simple_copy(self, svn_commit, source_groups): - """Return True iff SVN_COMMIT can be created as a simple copy. - - SVN_COMMIT is an SVNTagCommit. Return True iff it can be created - as a simple copy from an existing revision (i.e., if the fixup - branch can be avoided for this tag creation).""" - - # The first requirement is that there be exactly one source: - if len(source_groups) != 1: - return False - - (source_lod, svn_revnum, cvs_symbols) = source_groups[0] - - # The second requirement is that the destination LOD not already - # exist: - try: - self._mirror.get_current_lod_directory(svn_commit.symbol) - except KeyError: - # The LOD doesn't already exist. This is good. - pass - else: - # The LOD already exists. It cannot be created by a copy. - return False - - # The third requirement is that the source LOD contains exactly - # the same files as we need to add to the symbol: - try: - source_node = self._mirror.get_old_lod_directory(source_lod, svn_revnum) - except KeyError: - raise InternalError('Source %r does not exist' % (source_lod,)) - return ( - set([cvs_symbol.cvs_file for cvs_symbol in cvs_symbols]) - == set(self._get_all_files(source_node)) - ) - - def _get_source_mark(self, source_lod, revnum): - """Return the mark active on SOURCE_LOD at the end of REVNUM.""" - - modifications = self._marks[source_lod] - i = bisect.bisect_left(modifications, (revnum + 1,)) - 1 - (revnum, mark) = modifications[i] - return mark - - def _process_symbol_commit( - self, svn_commit, git_branch, source_groups, mark - ): - author = self._get_author(svn_commit) - log_msg = self._get_log_msg(svn_commit) - - self.f.write('commit %s\n' % (git_branch,)) - self.f.write('mark :%d\n' % (mark,)) - self.f.write('committer %s %d +0000\n' % (author, svn_commit.date,)) - self.f.write('data %d\n' % (len(log_msg),)) - self.f.write('%s\n' % (log_msg,)) - - for (source_lod, source_revnum, cvs_symbols,) in source_groups: - self.f.write( - 'merge :%d\n' - % (self._get_source_mark(source_lod, source_revnum),) - ) - - for (source_lod, source_revnum, cvs_symbols,) in source_groups: - for cvs_symbol in cvs_symbols: - self.revision_writer.branch_file(cvs_symbol) - - self.f.write('\n') - - def process_branch_commit(self, svn_commit): - self._mirror.start_commit(svn_commit.revnum) - source_groups = list(self._get_source_groups(svn_commit)) - for groups in get_chunks(source_groups, self.max_merges): - self._process_symbol_commit( - svn_commit, 'refs/heads/%s' % (svn_commit.symbol.name,), - groups, - self._create_commit_mark(svn_commit.symbol, svn_commit.revnum), - ) - self._mirror.end_commit() - - def _set_symbol(self, symbol, mark): - if isinstance(symbol, Branch): - category = 'heads' - elif isinstance(symbol, Tag): - category = 'tags' - else: - raise InternalError() - self.f.write('reset refs/%s/%s\n' % (category, symbol.name,)) - self.f.write('from :%d\n' % (mark,)) - - def process_tag_commit(self, svn_commit): - # FIXME: For now we create a fixup branch with the same name as - # the tag, then the tag. We never delete the fixup branch. Also, - # a fixup branch is created even if the tag could be created from - # a single source. - self._mirror.start_commit(svn_commit.revnum) - - source_groups = list(self._get_source_groups(svn_commit)) - if self._is_simple_copy(svn_commit, source_groups): - (source_lod, source_revnum, cvs_symbols) = source_groups[0] - Log().debug( - '%s will be created via a simple copy from %s:r%d' - % (svn_commit.symbol, source_lod, source_revnum,) - ) - mark = self._get_source_mark(source_lod, source_revnum) - self._set_symbol(svn_commit.symbol, mark) - else: - Log().debug( - '%s will be created via a fixup branch' % (svn_commit.symbol,) - ) - - # Create the fixup branch (which might involve making more than - # one commit): - for groups in get_chunks(source_groups, self.max_merges): - mark = self._create_commit_mark(svn_commit.symbol, svn_commit.revnum) - self._process_symbol_commit( - svn_commit, FIXUP_BRANCH_NAME, groups, mark - ) - - # Store the mark of the last commit to the fixup branch as the - # value of the tag: - self._set_symbol(svn_commit.symbol, mark) - self.f.write('reset %s\n' % (FIXUP_BRANCH_NAME,)) - self.f.write('\n') - - self._mirror.end_commit() - - def cleanup(self): - self.revision_writer.finish() - self._mirror.close() - self.f.close() - del self.f - self._symbolings_reader.close() - del self._symbolings_reader - - diff --git a/cvs2svn_lib/git_revision_recorder.py b/cvs2svn_lib/git_revision_recorder.py deleted file mode 100644 index 604f8ac..0000000 --- a/cvs2svn_lib/git_revision_recorder.py +++ /dev/null @@ -1,114 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2007-2009 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""Write file contents to a stream of git-fast-import blobs.""" - -import itertools - -from cvs2svn_lib.symbol import Trunk -from cvs2svn_lib.cvs_item import CVSRevisionDelete -from cvs2svn_lib.cvs_item import CVSSymbol -from cvs2svn_lib.fulltext_revision_recorder import FulltextRevisionRecorder -from cvs2svn_lib.key_generator import KeyGenerator - - -class GitRevisionRecorder(FulltextRevisionRecorder): - """Output file revisions to git-fast-import.""" - - def __init__(self, blob_filename): - self.blob_filename = blob_filename - - def start(self): - self.dump_file = open(self.blob_filename, 'wb') - self._mark_generator = KeyGenerator() - - def start_file(self, cvs_file_items): - self._cvs_file_items = cvs_file_items - - def _get_original_source(self, cvs_rev): - """Return the original source of the contents of CVS_REV. - - Return the first non-delete CVSRevision with the same contents as - CVS_REV. 'First' here refers to deltatext order; i.e., the very - first revision is HEAD on trunk, then backwards to the root of a - branch, then out to the tip of a branch. - - The candidates are all revisions along the CVS delta-dependency - chain until the next one that has a deltatext (inclusive). Of the - candidates, CVSRevisionDeletes are disqualified because, even - though CVS records their contents, it is impossible to extract - their fulltext using commands like 'cvs checkout -p'. - - If there is no other CVSRevision that has the same content, return - CVS_REV itself.""" - - # Keep track of the "best" source CVSRevision found so far: - best_source_rev = None - - for cvs_rev in itertools.chain( - [cvs_rev], self._cvs_file_items.iter_deltatext_ancestors(cvs_rev) - ): - if not isinstance(cvs_rev, CVSRevisionDelete): - best_source_rev = cvs_rev - - if cvs_rev.deltatext_exists: - break - - return best_source_rev - - def record_fulltext(self, cvs_rev, log, fulltext): - """Write the fulltext to a blob if it is original and not a delete. - - The reason we go to this trouble is to avoid writing the same file - contents multiple times for a string of revisions that don't have - deltatexts (as, for example, happens with dead revisions and - imported revisions).""" - - if isinstance(cvs_rev, CVSRevisionDelete): - # There is no need to record a delete revision, and its token - # will never be needed: - return None - - source = self._get_original_source(cvs_rev) - - if source.id == cvs_rev.id: - # Revision is its own source; write it out: - mark = self._mark_generator.gen_id() - self.dump_file.write('blob\n') - self.dump_file.write('mark :%d\n' % (mark,)) - self.dump_file.write('data %d\n' % (len(fulltext),)) - self.dump_file.write(fulltext) - self.dump_file.write('\n') - return mark - else: - # Return as revision_recorder_token the CVSRevision.id of the - # original source revision: - return source.revision_recorder_token - - def finish_file(self, cvs_file_items): - # Determine the original source of each CVSSymbol, and store it as - # the symbol's revision_recorder_token. - for cvs_item in cvs_file_items.values(): - if isinstance(cvs_item, CVSSymbol): - cvs_source = cvs_item.get_cvs_revision_source(cvs_file_items) - cvs_item.revision_recorder_token = cvs_source.revision_recorder_token - - del self._cvs_file_items - - def finish(self): - self.dump_file.close() - - diff --git a/cvs2svn_lib/git_run_options.py b/cvs2svn_lib/git_run_options.py deleted file mode 100644 index 726b127..0000000 --- a/cvs2svn_lib/git_run_options.py +++ /dev/null @@ -1,274 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2000-2009 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""This module manages cvs2git run options.""" - - -import sys -import datetime -import codecs - -from cvs2svn_lib.version import VERSION -from cvs2svn_lib.common import error_prefix -from cvs2svn_lib.common import FatalError -from cvs2svn_lib.log import Log -from cvs2svn_lib.context import Ctx -from cvs2svn_lib.run_options import not_both -from cvs2svn_lib.run_options import RunOptions -from cvs2svn_lib.run_options import ContextOption -from cvs2svn_lib.run_options import IncompatibleOption -from cvs2svn_lib.run_options import authors -from cvs2svn_lib.man_writer import ManWriter -from cvs2svn_lib.project import Project -from cvs2svn_lib.rcs_revision_manager import RCSRevisionReader -from cvs2svn_lib.cvs_revision_manager import CVSRevisionReader -from cvs2svn_lib.git_revision_recorder import GitRevisionRecorder -from cvs2svn_lib.git_output_option import GitRevisionMarkWriter -from cvs2svn_lib.git_output_option import GitOutputOption -from cvs2svn_lib.revision_manager import NullRevisionRecorder -from cvs2svn_lib.revision_manager import NullRevisionExcluder -from cvs2svn_lib.fulltext_revision_recorder \ - import SimpleFulltextRevisionRecorderAdapter - - -short_desc = 'convert a cvs repository into a git repository' - -synopsis = """\ -.B cvs2git -[\\fIOPTION\\fR]... \\fIOUTPUT-OPTIONS CVS-REPOS-PATH\\fR -.br -.B cvs2git -[\\fIOPTION\\fR]... \\fI--options=PATH\\fR -""" - -long_desc = """\ -Create a new git repository based on the version history stored in a -CVS repository. Each CVS commit will be mirrored in the git -repository, including such information as date of commit and id of the -committer. -.P -The output of this program are a "blobfile" and a "dumpfile", which -together can be loaded into a git repository using "git fast-import". -.P -\\fICVS-REPOS-PATH\\fR is the filesystem path of the part of the CVS -repository that you want to convert. This path doesn't have to be the -top level directory of a CVS repository; it can point at a project -within a repository, in which case only that project will be -converted. This path or one of its parent directories has to contain -a subdirectory called CVSROOT (though the CVSROOT directory can be -empty). -.P -It is not possible directly to convert a CVS repository to which you -only have remote access, but the FAQ describes tools that may be used -to create a local copy of a remote CVS repository. -""" - -files = """\ -A directory called \\fIcvs2svn-tmp\\fR (or the directory specified by -\\fB--tmpdir\\fR) is used as scratch space for temporary data files. -""" - -see_also = [ - ('cvs', '1'), - ('git', '1'), - ('git-fast-import', '1'), - ] - - -class GitRunOptions(RunOptions): - def __init__(self, progname, cmd_args, pass_manager): - Ctx().cross_project_commits = False - Ctx().cross_branch_commits = False - RunOptions.__init__(self, progname, cmd_args, pass_manager) - - def _get_output_options_group(self): - group = RunOptions._get_output_options_group(self) - - group.add_option(IncompatibleOption( - '--blobfile', type='string', - action='store', - help='path to which the "blob" data should be written', - man_help=( - 'Write the "blob" data (containing revision contents) to ' - '\\fIpath\\fR.' - ), - metavar='PATH', - )) - group.add_option(IncompatibleOption( - '--dumpfile', type='string', - action='store', - help='path to which the revision data should be written', - man_help=( - 'Write the revision data (branches and commits) to \\fIpath\\fR.' - ), - metavar='PATH', - )) - group.add_option(ContextOption( - '--dry-run', - action='store_true', - help=( - 'do not create any output; just print what would happen.' - ), - man_help=( - 'Do not create any output; just print what would happen.' - ), - )) - - return group - - def _get_extraction_options_group(self): - group = RunOptions._get_extraction_options_group(self) - - self.parser.set_default('use_cvs', False) - group.add_option(IncompatibleOption( - '--use-cvs', - action='store_true', - help=( - 'use CVS to extract revision contents (slower than ' - '--use-rcs but more reliable) (default)' - ), - man_help=( - 'Use CVS to extract revision contents. This option is slower ' - 'than \\fB--use-rcs\\fR but more reliable.' - ), - )) - self.parser.set_default('use_rcs', False) - group.add_option(IncompatibleOption( - '--use-rcs', - action='store_true', - help=( - 'use RCS to extract revision contents (faster than ' - '--use-cvs but fails in some cases)' - ), - man_help=( - 'Use RCS \'co\' to extract revision contents. This option is ' - 'faster than \\fB--use-cvs\\fR but fails in some cases.' - ), - )) - - return group - - def callback_manpage(self, option, opt_str, value, parser): - f = codecs.getwriter('utf_8')(sys.stdout) - ManWriter( - parser, - section='1', - date=datetime.date.today(), - source='Version %s' % (VERSION,), - manual='User Commands', - short_desc=short_desc, - synopsis=synopsis, - long_desc=long_desc, - files=files, - authors=authors, - see_also=see_also, - ).write_manpage(f) - sys.exit(0) - - def process_io_options(self): - """Process input/output options. - - Process options related to extracting data from the CVS repository - and writing to 'git fast-import'-formatted files.""" - - ctx = Ctx() - options = self.options - - not_both(options.use_rcs, '--use-rcs', - options.use_cvs, '--use-cvs') - - if options.use_rcs: - revision_reader = RCSRevisionReader( - co_executable=options.co_executable - ) - else: - # --use-cvs is the default: - revision_reader = CVSRevisionReader( - cvs_executable=options.cvs_executable - ) - - if ctx.dry_run: - ctx.revision_recorder = NullRevisionRecorder() - else: - if not (options.blobfile and options.dumpfile): - raise FatalError("must pass '--blobfile' and '--dumpfile' options.") - ctx.revision_recorder = SimpleFulltextRevisionRecorderAdapter( - revision_reader, - GitRevisionRecorder(options.blobfile), - ) - - ctx.revision_excluder = NullRevisionExcluder() - ctx.revision_reader = None - - ctx.output_option = GitOutputOption( - options.dumpfile, - GitRevisionMarkWriter(), - max_merges=None, - # Optional map from CVS author names to git author names: - author_transforms={}, # FIXME - ) - - def set_project( - self, - project_cvs_repos_path, - symbol_transforms=None, - symbol_strategy_rules=[], - ): - """Set the project to be converted. - - If a project had already been set, overwrite it. - - Most arguments are passed straight through to the Project - constructor. SYMBOL_STRATEGY_RULES is an iterable of - SymbolStrategyRules that will be applied to symbols in this - project.""" - - symbol_strategy_rules = list(symbol_strategy_rules) - - project = Project( - 0, - project_cvs_repos_path, - symbol_transforms=symbol_transforms, - ) - - self.projects = [project] - self.project_symbol_strategy_rules = [symbol_strategy_rules] - - def process_options(self): - # Consistency check for options and arguments. - if len(self.args) == 0: - self.usage() - sys.exit(1) - - if len(self.args) > 1: - Log().error(error_prefix + ": must pass only one CVS repository.\n") - self.usage() - sys.exit(1) - - cvsroot = self.args[0] - - self.process_io_options() - self.process_symbol_strategy_options() - self.process_property_setter_options() - - # Create the project: - self.set_project( - cvsroot, - symbol_transforms=self.options.symbol_transforms, - symbol_strategy_rules=self.options.symbol_strategy_rules, - ) - - diff --git a/cvs2svn_lib/key_generator.py b/cvs2svn_lib/key_generator.py deleted file mode 100644 index d580d6b..0000000 --- a/cvs2svn_lib/key_generator.py +++ /dev/null @@ -1,45 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2000-2008 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""This module contains the KeyGenerator class.""" - - -class KeyGenerator: - """Generate a series of unique keys.""" - - def __init__(self, first_id=1): - """Initialize a KeyGenerator with the specified FIRST_ID. - - FIRST_ID should be an int or long, and the generated keys will be - of the same type.""" - - self._key_base = first_id - self._last_id = None - - def gen_id(self): - """Generate and return a previously-unused key, as an integer.""" - - self._last_id = self._key_base - self._key_base += 1 - - return self._last_id - - def get_last_id(self): - """Return the last id that was generated, as an integer.""" - - return self._last_id - - diff --git a/cvs2svn_lib/log.py b/cvs2svn_lib/log.py deleted file mode 100644 index 798350c..0000000 --- a/cvs2svn_lib/log.py +++ /dev/null @@ -1,174 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2000-2008 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""This module contains a simple logging facility for cvs2svn.""" - - -import sys -import time -import threading - - -class Log: - """A Simple logging facility. - - If self.log_level is DEBUG or higher, each line will be timestamped - with the number of wall-clock seconds since the time when this - module was first imported. - - If self.use_timestamps is True, each line will be timestamped with a - human-readable clock time. - - The public methods of this class are thread-safe. - - This class is a Borg; see - http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66531.""" - - # These constants represent the log levels that this class supports. - # The increase_verbosity() and decrease_verbosity() methods rely on - # these constants being consecutive integers: - ERROR = -2 - WARN = -1 - QUIET = 0 - NORMAL = 1 - VERBOSE = 2 - DEBUG = 3 - - start_time = time.time() - - __shared_state = {} - - def __init__(self): - self.__dict__ = self.__shared_state - if self.__dict__: - return - - self.log_level = Log.NORMAL - - # Set this to True if you want to see timestamps on each line output. - self.use_timestamps = False - - # The output file to use for errors: - self._err = sys.stderr - - # The output file to use for lower-priority messages: - self._out = sys.stdout - - # Lock to serialize writes to the log: - self.lock = threading.Lock() - - def increase_verbosity(self): - self.lock.acquire() - try: - self.log_level = min(self.log_level + 1, Log.DEBUG) - finally: - self.lock.release() - - def decrease_verbosity(self): - self.lock.acquire() - try: - self.log_level = max(self.log_level - 1, Log.ERROR) - finally: - self.lock.release() - - def is_on(self, level): - """Return True iff messages at the specified LEVEL are currently on. - - LEVEL should be one of the constants Log.WARN, Log.QUIET, etc.""" - - return self.log_level >= level - - def _timestamp(self): - """Return a timestamp if needed, as a string with a trailing space.""" - - retval = [] - - if self.log_level >= Log.DEBUG: - retval.append('%f: ' % (time.time() - self.start_time,)) - - if self.use_timestamps: - retval.append(time.strftime('[%Y-%m-%d %I:%M:%S %Z] - ')) - - return ''.join(retval) - - def _write(self, out, *args): - """Write a message to OUT. - - If there are multiple ARGS, they will be separated by spaces. If - there are multiple lines, they will be output one by one with the - same timestamp prefix.""" - - timestamp = self._timestamp() - s = ' '.join(map(str, args)) - lines = s.split('\n') - if lines and not lines[-1]: - del lines[-1] - - self.lock.acquire() - try: - for s in lines: - out.write('%s%s\n' % (timestamp, s,)) - # Ensure that log output doesn't get out-of-order with respect to - # stderr output. - out.flush() - finally: - self.lock.release() - - def write(self, *args): - """Write a message to SELF._out. - - This is a public method to use for writing to the output log - unconditionally.""" - - self._write(self._out, *args) - - def error(self, *args): - """Log a message at the ERROR level.""" - - if self.is_on(Log.ERROR): - self._write(self._err, *args) - - def warn(self, *args): - """Log a message at the WARN level.""" - - if self.is_on(Log.WARN): - self._write(self._out, *args) - - def quiet(self, *args): - """Log a message at the QUIET level.""" - - if self.is_on(Log.QUIET): - self._write(self._out, *args) - - def normal(self, *args): - """Log a message at the NORMAL level.""" - - if self.is_on(Log.NORMAL): - self._write(self._out, *args) - - def verbose(self, *args): - """Log a message at the VERBOSE level.""" - - if self.is_on(Log.VERBOSE): - self._write(self._out, *args) - - def debug(self, *args): - """Log a message at the DEBUG level.""" - - if self.is_on(Log.DEBUG): - self._write(self._out, *args) - - diff --git a/cvs2svn_lib/main.py b/cvs2svn_lib/main.py deleted file mode 100644 index 492c49e..0000000 --- a/cvs2svn_lib/main.py +++ /dev/null @@ -1,117 +0,0 @@ -#!/usr/bin/env python2 -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2000-2009 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -import os -import errno -import gc - -try: - # Try to get access to a bunch of encodings for use with --encoding. - # See http://cjkpython.i18n.org/ for details. - import iconv_codec -except ImportError: - pass - -from cvs2svn_lib.common import FatalError -from cvs2svn_lib.svn_run_options import SVNRunOptions -from cvs2svn_lib.git_run_options import GitRunOptions -from cvs2svn_lib.bzr_run_options import BzrRunOptions -from cvs2svn_lib.context import Ctx -from cvs2svn_lib.pass_manager import PassManager -from cvs2svn_lib.passes import passes - - -def main(progname, run_options, pass_manager): - # Disable garbage collection, as we try not to create any circular - # data structures: - gc.disable() - - # Convenience var, so we don't have to keep instantiating this Borg. - ctx = Ctx() - - # Make sure the tmp directory exists. Note that we don't check if - # it's empty -- we want to be able to use, for example, "." to hold - # tempfiles. But if we *did* want check if it were empty, we'd do - # something like os.stat(ctx.tmpdir)[stat.ST_NLINK], of course :-). - if not os.path.exists(ctx.tmpdir): - erase_tmpdir = True - os.mkdir(ctx.tmpdir) - elif not os.path.isdir(ctx.tmpdir): - raise FatalError( - "cvs2svn tried to use '%s' for temporary files, but that path\n" - " exists and is not a directory. Please make it be a directory,\n" - " or specify some other directory for temporary files." - % (ctx.tmpdir,)) - else: - erase_tmpdir = False - - # But do lock the tmpdir, to avoid process clash. - try: - os.mkdir(os.path.join(ctx.tmpdir, 'cvs2svn.lock')) - except OSError, e: - if e.errno == errno.EACCES: - raise FatalError("Permission denied:" - + " No write access to directory '%s'." % ctx.tmpdir) - if e.errno == errno.EEXIST: - raise FatalError( - "cvs2svn is using directory '%s' for temporary files, but\n" - " subdirectory '%s/cvs2svn.lock' exists, indicating that another\n" - " cvs2svn process is currently using '%s' as its temporary\n" - " workspace. If you are certain that is not the case,\n" - " then remove the '%s/cvs2svn.lock' subdirectory." - % (ctx.tmpdir, ctx.tmpdir, ctx.tmpdir, ctx.tmpdir,)) - raise - - try: - if run_options.profiling: - import hotshot - prof = hotshot.Profile('cvs2svn.hotshot') - prof.runcall(pass_manager.run, run_options) - prof.close() - else: - pass_manager.run(run_options) - finally: - try: - os.rmdir(os.path.join(ctx.tmpdir, 'cvs2svn.lock')) - except: - pass - - if erase_tmpdir: - try: - os.rmdir(ctx.tmpdir) - except: - pass - - -def svn_main(progname, cmd_args): - pass_manager = PassManager(passes) - run_options = SVNRunOptions(progname, cmd_args, pass_manager) - main(progname, run_options, pass_manager) - - -def git_main(progname, cmd_args): - pass_manager = PassManager(passes) - run_options = GitRunOptions(progname, cmd_args, pass_manager) - main(progname, run_options, pass_manager) - - -def bzr_main(progname, cmd_args): - pass_manager = PassManager(passes) - run_options = BzrRunOptions(progname, cmd_args, pass_manager) - main(progname, run_options, pass_manager) - - diff --git a/cvs2svn_lib/man_writer.py b/cvs2svn_lib/man_writer.py deleted file mode 100644 index 3cca8c9..0000000 --- a/cvs2svn_lib/man_writer.py +++ /dev/null @@ -1,197 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2009 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""This module contains the ManWriter class for outputting manpages.""" - - -import datetime -import optparse -import re - - -whitespace_re = re.compile(r'\s+') - -def wrap(s, width=70): - # Convert all whitespace substrings to single spaces: - s = whitespace_re.sub(' ', s) - s = s.strip() - retval = [] - while s: - if len(s) <= width: - retval.append(s) - break - i = s.rfind(' ', 0, width + 1) - if i == -1: - # There were no spaces within the first width+1 characters; break - # at the next space after width: - i = s.find(' ', width + 1) - if i == -1: - # There were no spaces in s at all. - retval.append(s) - break - - retval.append(s[:i].rstrip()) - s = s[i+1:].lstrip() - - for (i,line) in enumerate(retval): - if line.startswith('\'') or line.startswith('.'): - # These are roff control characters and have to be escaped: - retval[i] = '\\' + line - - return '\n'.join(retval) - - -class ManOption(optparse.Option): - """An optparse.Option that holds an explicit string for the man page.""" - - def __init__(self, *args, **kw): - self.man_help = kw.pop('man_help') - optparse.Option.__init__(self, *args, **kw) - - -class ManWriter(object): - def __init__( - self, - parser, - section, date, source, manual, - short_desc, synopsis, long_desc, files, authors, see_also, - ): - self.parser = parser - self.section = section - self.date = date - self.source = source - self.manual = manual - self.short_desc = short_desc - self.synopsis = synopsis - self.long_desc = long_desc - self.files = files - self.authors = authors - self.see_also = see_also - - def write_title(self, f): - f.write('.\\" Process this file with\n') - f.write( - '.\\" groff -man -Tascii %s.%s\n' % ( - self.parser.get_prog_name(), - self.section, - ) - ) - f.write( - '.TH %s "%s" "%s" "%s" "%s"\n' % ( - self.parser.get_prog_name().upper(), - self.section, - self.date.strftime('%b %d, %Y'), - self.source, - self.manual, - ) - ) - - def write_name(self, f): - f.write('.SH "NAME"\n') - f.write( - '%s \- %s\n' % ( - self.parser.get_prog_name(), - self.short_desc, - ) - ) - - def write_synopsis(self, f): - f.write('.SH "SYNOPSIS"\n') - f.write(self.synopsis) - - def write_description(self, f): - f.write('.SH "DESCRIPTION"\n') - f.write(self.long_desc) - - def _get_option_strings(self, option): - """Return a list of option strings formatted with their metavariables. - - This method is very similar to - optparse.HelpFormatter.format_option_strings(). - - """ - - if option.takes_value(): - metavar = (option.metavar or option.dest).lower() - short_opts = [ - '\\fB%s\\fR \\fI%s\\fR' % (opt, metavar) - for opt in option._short_opts - ] - long_opts = [ - '\\fB%s\\fR=\\fI%s\\fR' % (opt, metavar) - for opt in option._long_opts - ] - else: - short_opts = [ - '\\fB%s\\fR' % (opt,) - for opt in option._short_opts - ] - long_opts = [ - '\\fB%s\\fR' % (opt,) - for opt in option._long_opts - ] - - return short_opts + long_opts - - def _write_option(self, f, option): - man_help = getattr(option, 'man_help', option.help) - - if man_help is not optparse.SUPPRESS_HELP: - man_help = wrap(man_help) - f.write('.IP "%s"\n' % (', '.join(self._get_option_strings(option)),)) - f.write('%s\n' % (man_help,)) - - def _write_container_help(self, f, container): - for option in container.option_list: - if option.help is not optparse.SUPPRESS_HELP: - self._write_option(f, option) - - def write_options(self, f): - f.write('.SH "OPTIONS"\n') - if self.parser.option_list: - (self._write_container_help(f, self.parser)) - for group in self.parser.option_groups: - f.write('.SH "%s"\n' % (group.title.upper(),)) - if group.description: - f.write(self.format_description(group.description) + '\n') - self._write_container_help(f, group) - - def write_files(self, f): - f.write('.SH "FILES"\n') - f.write(self.files) - - def write_authors(self, f): - f.write('.SH "AUTHORS"\n') - f.write(self.authors) - - def write_see_also(self, f): - f.write('.SH "SEE ALSO"\n') - f.write(', '.join([ - '%s(%s)' % (name, section,) - for (name, section,) in self.see_also - ]) + '\n') - - def write_manpage(self, f): - self.write_title(f) - self.write_name(f) - self.write_synopsis(f) - self.write_description(f) - self.write_options(f) - self.write_files(f) - self.write_authors(f) - self.write_see_also(f) - - diff --git a/cvs2svn_lib/metadata.py b/cvs2svn_lib/metadata.py deleted file mode 100644 index 6cd1337..0000000 --- a/cvs2svn_lib/metadata.py +++ /dev/null @@ -1,26 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2008 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""Represent CVSRevision metadata.""" - - -class Metadata(object): - def __init__(self, id, author, log_msg): - self.id = id - self.author = author - self.log_msg = log_msg - - diff --git a/cvs2svn_lib/metadata_database.py b/cvs2svn_lib/metadata_database.py deleted file mode 100644 index de01920..0000000 --- a/cvs2svn_lib/metadata_database.py +++ /dev/null @@ -1,102 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2000-2009 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""This module contains classes to manage CVSRevision metadata.""" - - -try: - from hashlib import sha1 -except ImportError: - from sha import new as sha1 - -from cvs2svn_lib.context import Ctx -from cvs2svn_lib.database import IndexedDatabase -from cvs2svn_lib.key_generator import KeyGenerator -from cvs2svn_lib.serializer import PrimedPickleSerializer -from cvs2svn_lib.metadata import Metadata - - -def MetadataDatabase(store_filename, index_table_filename, mode): - """A database to store Metadata instances that describe CVSRevisions. - - This database manages a map - - id -> Metadata instance - - where id is a unique identifier for the metadata.""" - - return IndexedDatabase( - store_filename, index_table_filename, - mode, PrimedPickleSerializer((Metadata,)), - ) - - -class MetadataLogger: - """Store and generate IDs for the metadata associated with CVSRevisions. - - We want CVSRevisions that might be able to be combined to have the - same metadata ID, so we want a one-to-one relationship id <-> - metadata. We could simply construct a map {metadata : id}, but the - map would grow too large. Therefore, we generate a digest - containing the significant parts of the metadata, and construct a - map {digest : id}. - - To get the ID for a new set of metadata, we first create the digest. - If there is already an ID registered for that digest, we simply - return it. If not, we generate a new ID, store the metadata in the - metadata database under that ID, record the mapping {digest : id}, - and return the new id. - - What metadata is included in the digest? The author, log_msg, - project_id (if Ctx().cross_project_commits is not set), and - branch_name (if Ctx().cross_branch_commits is not set).""" - - def __init__(self, metadata_db): - self._metadata_db = metadata_db - - # A map { digest : id }: - self._digest_to_id = {} - - # A key_generator to generate keys for metadata that haven't been - # seen yet: - self.key_generator = KeyGenerator() - - def store(self, project, branch_name, author, log_msg): - """Store the metadata and return its id. - - Locate the record for a commit with the specified (PROJECT, - BRANCH_NAME, AUTHOR, LOG_MSG) and return its id. (Depending on - policy, not all of these items are necessarily used when creating - the unique id.) If there is no such record, create one and return - its newly-generated id.""" - - key = [author, log_msg] - if not Ctx().cross_project_commits: - key.append('%x' % project.id) - if not Ctx().cross_branch_commits: - key.append(branch_name or '') - - digest = sha1('\0'.join(key)).digest() - try: - # See if it is already known: - return self._digest_to_id[digest] - except KeyError: - id = self.key_generator.gen_id() - self._digest_to_id[digest] = id - self._metadata_db[id] = Metadata(id, author, log_msg) - return id - - diff --git a/cvs2svn_lib/openings_closings.py b/cvs2svn_lib/openings_closings.py deleted file mode 100644 index b1d4093..0000000 --- a/cvs2svn_lib/openings_closings.py +++ /dev/null @@ -1,236 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2000-2008 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""This module contains classes to keep track of symbol openings/closings.""" - - -import cPickle - -from cvs2svn_lib import config -from cvs2svn_lib.common import InternalError -from cvs2svn_lib.artifact_manager import artifact_manager -from cvs2svn_lib.svn_revision_range import SVNRevisionRange - - -# Constants used in SYMBOL_OPENINGS_CLOSINGS -OPENING = 'O' -CLOSING = 'C' - - -class SymbolingsLogger: - """Manage the file that contains lines for symbol openings and closings. - - This data will later be used to determine valid SVNRevision ranges - from which a file can be copied when creating a branch or tag in - Subversion. Do this by finding 'Openings' and 'Closings' for each - file copied onto a branch or tag. - - An 'Opening' is the beginning of the lifetime of the source - (CVSRevision or CVSBranch) from which a given CVSSymbol sprouts. - - The 'Closing' is the SVN revision when the source is deleted or - overwritten. - - For example, on file 'foo.c', branch BEE has branch number 1.2.2 and - obviously sprouts from revision 1.2. Therefore, the SVN revision - when 1.2 is committed is the opening for BEE on path 'foo.c', and - the SVN revision when 1.3 is committed is the closing for BEE on - path 'foo.c'. Note that there may be many revisions chronologically - between 1.2 and 1.3, for example, revisions on branches of 'foo.c', - perhaps even including on branch BEE itself. But 1.3 is the next - revision *on the same line* as 1.2, that is why it is the closing - revision for those symbolic names of which 1.2 is the opening. - - The reason for doing all this hullabaloo is (1) to determine what - range of SVN revision numbers can be used as the source of a copy of - a particular file onto a branch/tag, and (2) to minimize the number - of copies and deletes per creation by choosing source SVN revision - numbers that can be used for as many files as possible. - - For example, revisions 1.2 and 1.3 of foo.c might correspond to - revisions 17 and 30 in Subversion. That means that when creating - branch BEE, foo.c has to be copied from a Subversion revision number - in the range 17 <= revnum < 30. Now if there were another file, - 'bar.c', in the same directory, and 'bar.c's opening and closing for - BEE correspond to revisions 24 and 39 in Subversion, then we can - kill two birds with one stone by copying the whole directory from - somewhere in the range 24 <= revnum < 30.""" - - def __init__(self): - self.symbolings = open( - artifact_manager.get_temp_file(config.SYMBOL_OPENINGS_CLOSINGS), 'w') - - def log_revision(self, cvs_rev, svn_revnum): - """Log any openings and closings found in CVS_REV.""" - - for (symbol_id, cvs_symbol_id,) in cvs_rev.opened_symbols: - self._log_opening(symbol_id, cvs_symbol_id, svn_revnum) - - for (symbol_id, cvs_symbol_id) in cvs_rev.closed_symbols: - self._log_closing(symbol_id, cvs_symbol_id, svn_revnum) - - def log_branch_revision(self, cvs_branch, svn_revnum): - """Log any openings and closings found in CVS_BRANCH.""" - - for (symbol_id, cvs_symbol_id,) in cvs_branch.opened_symbols: - self._log_opening(symbol_id, cvs_symbol_id, svn_revnum) - - def _log(self, symbol_id, cvs_symbol_id, svn_revnum, type): - """Log an opening or closing to self.symbolings. - - Write out a single line to the symbol_openings_closings file - representing that SVN_REVNUM is either the opening or closing - (TYPE) of CVS_SYMBOL_ID for SYMBOL_ID. - - TYPE should be one of the following constants: OPENING or CLOSING.""" - - self.symbolings.write( - '%x %d %s %x\n' % (symbol_id, svn_revnum, type, cvs_symbol_id) - ) - - def _log_opening(self, symbol_id, cvs_symbol_id, svn_revnum): - """Log an opening to self.symbolings. - - See _log() for more information.""" - - self._log(symbol_id, cvs_symbol_id, svn_revnum, OPENING) - - def _log_closing(self, symbol_id, cvs_symbol_id, svn_revnum): - """Log a closing to self.symbolings. - - See _log() for more information.""" - - self._log(symbol_id, cvs_symbol_id, svn_revnum, CLOSING) - - def close(self): - self.symbolings.close() - self.symbolings = None - - -class SymbolingsReader: - """Provides an interface to retrieve symbol openings and closings. - - This class accesses the SYMBOL_OPENINGS_CLOSINGS_SORTED file and the - SYMBOL_OFFSETS_DB. Does the heavy lifting of finding and returning - the correct opening and closing Subversion revision numbers for a - given symbolic name and SVN revision number range.""" - - def __init__(self): - """Opens the SYMBOL_OPENINGS_CLOSINGS_SORTED for reading, and - reads the offsets database into memory.""" - - self.symbolings = open( - artifact_manager.get_temp_file( - config.SYMBOL_OPENINGS_CLOSINGS_SORTED), - 'r') - # The offsets_db is really small, and we need to read and write - # from it a fair bit, so suck it into memory - offsets_db = file( - artifact_manager.get_temp_file(config.SYMBOL_OFFSETS_DB), 'rb') - # A map from symbol_id to offset. The values of this map are - # incremented as the openings and closings for a symbol are - # consumed. - self.offsets = cPickle.load(offsets_db) - offsets_db.close() - - def close(self): - self.symbolings.close() - del self.symbolings - del self.offsets - - def _generate_lines(self, symbol): - """Generate the lines for SYMBOL. - - SYMBOL is a TypedSymbol instance. Yield the tuple (revnum, type, - cvs_symbol_id) for all openings and closings for SYMBOL.""" - - if symbol.id in self.offsets: - # Set our read offset for self.symbolings to the offset for this - # symbol: - self.symbolings.seek(self.offsets[symbol.id]) - - while True: - line = self.symbolings.readline().rstrip() - if not line: - break - (id, revnum, type, cvs_symbol_id) = line.split() - id = int(id, 16) - revnum = int(revnum) - if id != symbol.id: - break - cvs_symbol_id = int(cvs_symbol_id, 16) - - yield (revnum, type, cvs_symbol_id) - - def get_range_map(self, svn_symbol_commit): - """Return the ranges of all CVSSymbols in SVN_SYMBOL_COMMIT. - - Return a map { CVSSymbol : SVNRevisionRange }.""" - - # A map { cvs_symbol_id : CVSSymbol }: - cvs_symbol_map = {} - for cvs_symbol in svn_symbol_commit.get_cvs_items(): - cvs_symbol_map[cvs_symbol.id] = cvs_symbol - - range_map = {} - - for (revnum, type, cvs_symbol_id) \ - in self._generate_lines(svn_symbol_commit.symbol): - cvs_symbol = cvs_symbol_map.get(cvs_symbol_id) - if cvs_symbol is None: - # This CVSSymbol is not part of SVN_SYMBOL_COMMIT. - continue - range = range_map.get(cvs_symbol) - if type == OPENING: - if range is not None: - raise InternalError( - 'Multiple openings logged for %r' % (cvs_symbol,) - ) - range_map[cvs_symbol] = SVNRevisionRange( - cvs_symbol.source_lod, revnum - ) - else: - if range is None: - raise InternalError( - 'Closing precedes opening for %r' % (cvs_symbol,) - ) - if range.closing_revnum is not None: - raise InternalError( - 'Multiple closings logged for %r' % (cvs_symbol,) - ) - range.add_closing(revnum) - - # Make sure that all CVSSymbols are accounted for, and adjust the - # closings to be not later than svn_symbol_commit.revnum. - for cvs_symbol in cvs_symbol_map.itervalues(): - try: - range = range_map[cvs_symbol] - except KeyError: - raise InternalError('No opening for %s' % (cvs_symbol,)) - - if range.opening_revnum >= svn_symbol_commit.revnum: - raise InternalError( - 'Opening in r%d not ready for %s in r%d' - % (range.opening_revnum, cvs_symbol, svn_symbol_commit.revnum,) - ) - - if range.closing_revnum is not None \ - and range.closing_revnum > svn_symbol_commit.revnum: - range.closing_revnum = None - - return range_map - - diff --git a/cvs2svn_lib/output_option.py b/cvs2svn_lib/output_option.py deleted file mode 100644 index 70419e6..0000000 --- a/cvs2svn_lib/output_option.py +++ /dev/null @@ -1,85 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2000-2009 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""This module contains classes that hold the cvs2svn output options.""" - - -class OutputOption: - """Represents an output choice for a run of cvs2svn.""" - - def register_artifacts(self, which_pass): - """Register artifacts that will be needed for this output option. - - WHICH_PASS is the pass that will call our callbacks, so it should - be used to do the registering (e.g., call - WHICH_PASS.register_temp_file() and/or - WHICH_PASS.register_temp_file_needed()).""" - - pass - - def check(self): - """Check that the options stored in SELF are sensible. - - This might including the existence of a repository on disk, etc.""" - - raise NotImplementedError() - - def check_symbols(self, symbol_map): - """Check that the symbols in SYMBOL_MAP are OK for this output option. - - SYMBOL_MAP is a map {AbstractSymbol : (Trunk|TypedSymbol)}, - indicating how each symbol is planned to be converted. Raise a - FatalError if the symbol plan is not acceptable for this output - option.""" - - raise NotImplementedError() - - def setup(self, svn_rev_count): - """Prepare this output option.""" - - raise NotImplementedError() - - def process_initial_project_commit(self, svn_commit): - """Process SVN_COMMIT, which is an SVNInitialProjectCommit.""" - - raise NotImplementedError() - - def process_primary_commit(self, svn_commit): - """Process SVN_COMMIT, which is an SVNPrimaryCommit.""" - - raise NotImplementedError() - - def process_post_commit(self, svn_commit): - """Process SVN_COMMIT, which is an SVNPostCommit.""" - - raise NotImplementedError() - - def process_branch_commit(self, svn_commit): - """Process SVN_COMMIT, which is an SVNBranchCommit.""" - - raise NotImplementedError() - - def process_tag_commit(self, svn_commit): - """Process SVN_COMMIT, which is an SVNTagCommit.""" - - raise NotImplementedError() - - def cleanup(self): - """Perform any required cleanup related to this output option.""" - - raise NotImplementedError() - - diff --git a/cvs2svn_lib/pass_manager.py b/cvs2svn_lib/pass_manager.py deleted file mode 100644 index 90fa2dc..0000000 --- a/cvs2svn_lib/pass_manager.py +++ /dev/null @@ -1,215 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2000-2009 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""This module contains tools to manage the passes of a conversion.""" - - -import time -import gc - -from cvs2svn_lib import config -from cvs2svn_lib.common import FatalError -from cvs2svn_lib.context import Ctx -from cvs2svn_lib.log import Log -from cvs2svn_lib.stats_keeper import StatsKeeper -from cvs2svn_lib.stats_keeper import read_stats_keeper -from cvs2svn_lib.artifact_manager import artifact_manager - - -class InvalidPassError(FatalError): - def __init__(self, msg): - FatalError.__init__( - self, msg + '\nUse --help-passes for more information.') - - -def check_for_garbage(): - # We've turned off the garbage collector because we shouldn't - # need it (we don't create circular dependencies) and because it - # is therefore a waste of time. So here we check for any - # unreachable objects and generate a debug-level warning if any - # occur: - gc.set_debug(gc.DEBUG_SAVEALL) - gc_count = gc.collect() - if gc_count: - if Log().is_on(Log.DEBUG): - Log().debug( - 'INTERNAL: %d unreachable object(s) were garbage collected:' - % (gc_count,) - ) - for g in gc.garbage: - Log().debug(' %s' % (g,)) - del gc.garbage[:] - - -class Pass(object): - """Base class for one step of the conversion.""" - - def __init__(self): - # By default, use the pass object's class name as the pass name: - self.name = self.__class__.__name__ - - def register_artifacts(self): - """Register artifacts (created and needed) in artifact_manager.""" - - raise NotImplementedError - - def _register_temp_file(self, basename): - """Helper method; for brevity only.""" - - artifact_manager.register_temp_file(basename, self) - - def _register_temp_file_needed(self, basename): - """Helper method; for brevity only.""" - - artifact_manager.register_temp_file_needed(basename, self) - - def run(self, run_options, stats_keeper): - """Carry out this step of the conversion. - - RUN_OPTIONS is an instance of RunOptions. STATS_KEEPER is an - instance of StatsKeeper.""" - - raise NotImplementedError - - -class PassManager: - """Manage a list of passes that can be executed separately or all at once. - - Passes are numbered starting with 1.""" - - def __init__(self, passes): - """Construct a PassManager with the specified PASSES. - - Internally, passes are numbered starting with 1. So PASSES[0] is - considered to be pass number 1.""" - - self.passes = passes - self.num_passes = len(self.passes) - - def get_pass_number(self, pass_name, default=None): - """Return the number of the pass indicated by PASS_NAME. - - PASS_NAME should be a string containing the name or number of a - pass. If a number, it should be in the range 1 <= value <= - self.num_passes. Return an integer in the same range. If - PASS_NAME is the empty string and DEFAULT is specified, return - DEFAULT. Raise InvalidPassError if PASS_NAME cannot be converted - into a valid pass number.""" - - if not pass_name and default is not None: - assert 1 <= default <= self.num_passes - return default - - try: - # Does pass_name look like an integer? - pass_number = int(pass_name) - if not 1 <= pass_number <= self.num_passes: - raise InvalidPassError( - 'illegal value (%d) for pass number. Must be 1 through %d or\n' - 'the name of a known pass.' - % (pass_number,self.num_passes,)) - return pass_number - except ValueError: - # Is pass_name the name of one of the passes? - for (i, the_pass) in enumerate(self.passes): - if the_pass.name == pass_name: - return i + 1 - raise InvalidPassError('Unknown pass name (%r).' % (pass_name,)) - - def run(self, run_options): - """Run the specified passes, one after another. - - RUN_OPTIONS will be passed to the Passes' run() methods. - RUN_OPTIONS.start_pass is the number of the first pass that should - be run. RUN_OPTIONS.end_pass is the number of the last pass that - should be run. It must be that 1 <= RUN_OPTIONS.start_pass <= - RUN_OPTIONS.end_pass <= self.num_passes.""" - - # Convert start_pass and end_pass into the indices of the passes - # to execute, using the Python index range convention (i.e., first - # pass executed and first pass *after* the ones that should be - # executed). - index_start = run_options.start_pass - 1 - index_end = run_options.end_pass - - # Inform the artifact manager when artifacts are created and used: - for (i, the_pass) in enumerate(self.passes): - the_pass.register_artifacts() - # Each pass creates a new version of the statistics file: - artifact_manager.register_temp_file( - config.STATISTICS_FILE % (i + 1,), the_pass - ) - if i != 0: - # Each pass subsequent to the first reads the statistics file - # from the preceding pass: - artifact_manager.register_temp_file_needed( - config.STATISTICS_FILE % (i + 1 - 1,), the_pass - ) - - # Tell the artifact manager about passes that are being skipped this run: - for the_pass in self.passes[0:index_start]: - artifact_manager.pass_skipped(the_pass) - - start_time = time.time() - for i in range(index_start, index_end): - the_pass = self.passes[i] - Log().quiet('----- pass %d (%s) -----' % (i + 1, the_pass.name,)) - artifact_manager.pass_started(the_pass) - - if i == 0: - stats_keeper = StatsKeeper() - else: - stats_keeper = read_stats_keeper( - artifact_manager.get_temp_file( - config.STATISTICS_FILE % (i + 1 - 1,) - ) - ) - - the_pass.run(run_options, stats_keeper) - end_time = time.time() - stats_keeper.log_duration_for_pass( - end_time - start_time, i + 1, the_pass.name - ) - Log().normal(stats_keeper.single_pass_timing(i + 1)) - stats_keeper.archive( - artifact_manager.get_temp_file(config.STATISTICS_FILE % (i + 1,)) - ) - start_time = end_time - Ctx().clean() - # Allow the artifact manager to clean up artifacts that are no - # longer needed: - artifact_manager.pass_done(the_pass, Ctx().skip_cleanup) - - check_for_garbage() - - # Tell the artifact manager about passes that are being deferred: - for the_pass in self.passes[index_end:]: - artifact_manager.pass_deferred(the_pass) - - Log().quiet(stats_keeper) - Log().normal(stats_keeper.timings()) - - # Consistency check: - artifact_manager.check_clean() - - def help_passes(self): - """Output (to sys.stdout) the indices and names of available passes.""" - - print 'PASSES:' - for (i, the_pass) in enumerate(self.passes): - print '%5d : %s' % (i + 1, the_pass.name,) - - diff --git a/cvs2svn_lib/passes.py b/cvs2svn_lib/passes.py deleted file mode 100644 index af14692..0000000 --- a/cvs2svn_lib/passes.py +++ /dev/null @@ -1,1837 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2000-2009 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""This module defines the passes that make up a conversion.""" - - -import sys -import os -import shutil -import cPickle - -from cvs2svn_lib import config -from cvs2svn_lib.context import Ctx -from cvs2svn_lib.common import warning_prefix -from cvs2svn_lib.common import FatalException -from cvs2svn_lib.common import FatalError -from cvs2svn_lib.common import InternalError -from cvs2svn_lib.common import DB_OPEN_NEW -from cvs2svn_lib.common import DB_OPEN_READ -from cvs2svn_lib.common import DB_OPEN_WRITE -from cvs2svn_lib.common import Timestamper -from cvs2svn_lib.log import Log -from cvs2svn_lib.pass_manager import Pass -from cvs2svn_lib.serializer import PrimedPickleSerializer -from cvs2svn_lib.artifact_manager import artifact_manager -from cvs2svn_lib.cvs_file_database import CVSFileDatabase -from cvs2svn_lib.metadata_database import MetadataDatabase -from cvs2svn_lib.project import read_projects -from cvs2svn_lib.project import write_projects -from cvs2svn_lib.symbol import LineOfDevelopment -from cvs2svn_lib.symbol import Trunk -from cvs2svn_lib.symbol import Symbol -from cvs2svn_lib.symbol import Branch -from cvs2svn_lib.symbol import Tag -from cvs2svn_lib.symbol import ExcludedSymbol -from cvs2svn_lib.symbol_database import SymbolDatabase -from cvs2svn_lib.symbol_database import create_symbol_database -from cvs2svn_lib.symbol_statistics import SymbolPlanError -from cvs2svn_lib.symbol_statistics import IndeterminateSymbolException -from cvs2svn_lib.symbol_statistics import SymbolStatistics -from cvs2svn_lib.cvs_item import CVSRevision -from cvs2svn_lib.cvs_item import CVSSymbol -from cvs2svn_lib.cvs_item_database import OldCVSItemStore -from cvs2svn_lib.cvs_item_database import IndexedCVSItemStore -from cvs2svn_lib.cvs_item_database import cvs_item_primer -from cvs2svn_lib.cvs_item_database import NewSortableCVSRevisionDatabase -from cvs2svn_lib.cvs_item_database import OldSortableCVSRevisionDatabase -from cvs2svn_lib.cvs_item_database import NewSortableCVSSymbolDatabase -from cvs2svn_lib.cvs_item_database import OldSortableCVSSymbolDatabase -from cvs2svn_lib.key_generator import KeyGenerator -from cvs2svn_lib.changeset import RevisionChangeset -from cvs2svn_lib.changeset import OrderedChangeset -from cvs2svn_lib.changeset import SymbolChangeset -from cvs2svn_lib.changeset import BranchChangeset -from cvs2svn_lib.changeset import create_symbol_changeset -from cvs2svn_lib.changeset_graph import ChangesetGraph -from cvs2svn_lib.changeset_graph_link import ChangesetGraphLink -from cvs2svn_lib.changeset_database import ChangesetDatabase -from cvs2svn_lib.changeset_database import CVSItemToChangesetTable -from cvs2svn_lib.svn_commit import SVNRevisionCommit -from cvs2svn_lib.openings_closings import SymbolingsLogger -from cvs2svn_lib.svn_commit_creator import SVNCommitCreator -from cvs2svn_lib.persistence_manager import PersistenceManager -from cvs2svn_lib.collect_data import CollectData -from cvs2svn_lib.process import call_command -from cvs2svn_lib.check_dependencies_pass \ - import CheckItemStoreDependenciesPass -from cvs2svn_lib.check_dependencies_pass \ - import CheckIndexedItemStoreDependenciesPass - - -def sort_file(infilename, outfilename, options=[]): - """Sort file INFILENAME, storing the results to OUTFILENAME. - - OPTIONS is an optional list of strings that are passed as additional - options to the sort command.""" - - # GNU sort will sort our dates differently (incorrectly!) if our - # LC_ALL is anything but 'C', so if LC_ALL is set, temporarily set - # it to 'C' - lc_all_tmp = os.environ.get('LC_ALL', None) - os.environ['LC_ALL'] = 'C' - - # The -T option to sort has a nice side effect. The Win32 sort is - # case insensitive and cannot be used, and since it does not - # understand the -T option and dies if we try to use it, there is no - # risk that we use that sort by accident. - command = [ - Ctx().sort_executable, - '-T', Ctx().tmpdir - ] + options + [ - infilename - ] - - try: - # Under Windows, the subprocess module uses the Win32 - # CreateProcess, which always looks in the Windows system32 - # directory before it looks in the directories listed in the PATH - # environment variable. Since the Windows sort.exe is in the - # system32 directory it will always be chosen. A simple - # workaround is to launch the sort in a shell. When the shell - # (cmd.exe) searches it only examines the directories in the PATH - # so putting the directory with GNU sort ahead of the Windows - # system32 directory will cause GNU sort to be chosen. - call_command( - command, stdout=open(outfilename, 'w'), shell=(sys.platform=='win32') - ) - finally: - if lc_all_tmp is None: - del os.environ['LC_ALL'] - else: - os.environ['LC_ALL'] = lc_all_tmp - - # On some versions of Windows, os.system() does not return an error - # if the command fails. So add little consistency tests here that - # the output file was created and has the right size: - - if not os.path.exists(outfilename): - raise FatalError('Sort output file missing: %r' % (outfilename,)) - - if os.path.getsize(outfilename) != os.path.getsize(infilename): - raise FatalError( - 'Sort input and output file sizes differ:\n' - ' %r (%d bytes)\n' - ' %r (%d bytes)' % ( - infilename, os.path.getsize(infilename), - outfilename, os.path.getsize(outfilename), - ) - ) - - -class CollectRevsPass(Pass): - """This pass was formerly known as pass1.""" - - def register_artifacts(self): - self._register_temp_file(config.PROJECTS) - self._register_temp_file(config.SYMBOL_STATISTICS) - self._register_temp_file(config.METADATA_INDEX_TABLE) - self._register_temp_file(config.METADATA_STORE) - self._register_temp_file(config.CVS_FILES_DB) - self._register_temp_file(config.CVS_ITEMS_STORE) - Ctx().revision_recorder.register_artifacts(self) - - def run(self, run_options, stats_keeper): - Log().quiet("Examining all CVS ',v' files...") - Ctx()._projects = {} - Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_NEW) - cd = CollectData(Ctx().revision_recorder, stats_keeper) - for project in run_options.projects: - cd.process_project(project) - run_options.projects = None - - fatal_errors = cd.close() - - if fatal_errors: - raise FatalException("Pass 1 complete.\n" - + "=" * 75 + "\n" - + "Error summary:\n" - + "\n".join(fatal_errors) + "\n" - + "Exited due to fatal error(s).") - - Ctx()._cvs_file_db.close() - write_projects(artifact_manager.get_temp_file(config.PROJECTS)) - Log().quiet("Done") - - -class CleanMetadataPass(Pass): - """Clean up CVS revision metadata and write it to a new database.""" - - def register_artifacts(self): - self._register_temp_file(config.METADATA_CLEAN_INDEX_TABLE) - self._register_temp_file(config.METADATA_CLEAN_STORE) - self._register_temp_file_needed(config.METADATA_INDEX_TABLE) - self._register_temp_file_needed(config.METADATA_STORE) - - def _get_clean_author(self, author): - """Return AUTHOR, converted appropriately to UTF8. - - Raise a UnicodeException if it cannot be converted using the - configured cvs_author_decoder.""" - - try: - return self._authors[author] - except KeyError: - pass - - try: - clean_author = Ctx().cvs_author_decoder(author) - except UnicodeError: - self._authors[author] = author - raise UnicodeError('Problem decoding author \'%s\'' % (author,)) - - try: - clean_author = clean_author.encode('utf8') - except UnicodeError: - self._authors[author] = author - raise UnicodeError('Problem encoding author \'%s\'' % (author,)) - - self._authors[author] = clean_author - return clean_author - - def _get_clean_log_msg(self, log_msg): - """Return LOG_MSG, converted appropriately to UTF8. - - Raise a UnicodeException if it cannot be converted using the - configured cvs_log_decoder.""" - - try: - clean_log_msg = Ctx().cvs_log_decoder(log_msg) - except UnicodeError: - raise UnicodeError( - 'Problem decoding log message:\n' - '%s\n' - '%s\n' - '%s' - % ('-' * 75, log_msg, '-' * 75,) - ) - - try: - return clean_log_msg.encode('utf8') - except UnicodeError: - raise UnicodeError( - 'Problem encoding log message:\n' - '%s\n' - '%s\n' - '%s' - % ('-' * 75, log_msg, '-' * 75,) - ) - - def _clean_metadata(self, metadata): - """Clean up METADATA by overwriting its members as necessary.""" - - try: - metadata.author = self._get_clean_author(metadata.author) - except UnicodeError, e: - Log().warn('%s: %s' % (warning_prefix, e,)) - self.warnings = True - - try: - metadata.log_msg = self._get_clean_log_msg(metadata.log_msg) - except UnicodeError, e: - Log().warn('%s: %s' % (warning_prefix, e,)) - self.warnings = True - - def run(self, run_options, stats_keeper): - Log().quiet("Converting metadata to UTF8...") - metadata_db = MetadataDatabase( - artifact_manager.get_temp_file(config.METADATA_STORE), - artifact_manager.get_temp_file(config.METADATA_INDEX_TABLE), - DB_OPEN_READ, - ) - metadata_clean_db = MetadataDatabase( - artifact_manager.get_temp_file(config.METADATA_CLEAN_STORE), - artifact_manager.get_temp_file(config.METADATA_CLEAN_INDEX_TABLE), - DB_OPEN_NEW, - ) - - self.warnings = False - - # A map {author : clean_author} for those known (to avoid - # repeating warnings): - self._authors = {} - - for id in metadata_db.iterkeys(): - metadata = metadata_db[id] - - # Record the original author name because it might be needed for - # expanding CVS keywords: - metadata.original_author = metadata.author - - self._clean_metadata(metadata) - - metadata_clean_db[id] = metadata - - if self.warnings: - raise FatalError( - 'There were warnings converting author names and/or log messages\n' - 'to Unicode (see messages above). Please restart this pass\n' - 'with one or more \'--encoding\' parameters or with\n' - '\'--fallback-encoding\'.' - ) - - metadata_clean_db.close() - metadata_db.close() - Log().quiet("Done") - - -class CollateSymbolsPass(Pass): - """Divide symbols into branches, tags, and excludes.""" - - conversion_names = { - Trunk : 'trunk', - Branch : 'branch', - Tag : 'tag', - ExcludedSymbol : 'exclude', - Symbol : '.', - } - - def register_artifacts(self): - self._register_temp_file(config.SYMBOL_DB) - self._register_temp_file_needed(config.PROJECTS) - self._register_temp_file_needed(config.SYMBOL_STATISTICS) - - def get_symbol(self, run_options, stats): - """Use StrategyRules to decide what to do with a symbol. - - STATS is an instance of symbol_statistics._Stats describing an - instance of Symbol or Trunk. To determine how the symbol is to be - converted, consult the StrategyRules in the project's - symbol_strategy_rules. Each rule is allowed a chance to change - the way the symbol will be converted. If the symbol is not a - Trunk or TypedSymbol after all rules have run, raise - IndeterminateSymbolException.""" - - symbol = stats.lod - rules = run_options.project_symbol_strategy_rules[symbol.project.id] - for rule in rules: - symbol = rule.get_symbol(symbol, stats) - assert symbol is not None - - stats.check_valid(symbol) - - return symbol - - def log_symbol_summary(self, stats, symbol): - if not self.symbol_info_file: - return - - if isinstance(symbol, Trunk): - name = '.trunk.' - preferred_parent_name = '.' - else: - name = stats.lod.name - if symbol.preferred_parent_id is None: - preferred_parent_name = '.' - else: - preferred_parent = self.symbol_stats[symbol.preferred_parent_id].lod - if isinstance(preferred_parent, Trunk): - preferred_parent_name = '.trunk.' - else: - preferred_parent_name = preferred_parent.name - - if isinstance(symbol, LineOfDevelopment) and symbol.base_path: - symbol_path = symbol.base_path - else: - symbol_path = '.' - - self.symbol_info_file.write( - '%-5d %-30s %-10s %s %s\n' % ( - stats.lod.project.id, - name, - self.conversion_names[symbol.__class__], - symbol_path, - preferred_parent_name, - ) - ) - self.symbol_info_file.write(' # %s\n' % (stats,)) - parent_counts = stats.possible_parents.items() - if parent_counts: - self.symbol_info_file.write(' # Possible parents:\n') - parent_counts.sort(lambda a,b: cmp((b[1], a[0]), (a[1], b[0]))) - for (pp, count) in parent_counts: - if isinstance(pp, Trunk): - self.symbol_info_file.write( - ' # .trunk. : %d\n' % (count,) - ) - else: - self.symbol_info_file.write( - ' # %s : %d\n' % (pp.name, count,) - ) - - def get_symbols(self, run_options): - """Return a map telling how to convert symbols. - - The return value is a map {AbstractSymbol : (Trunk|TypedSymbol)}, - indicating how each symbol should be converted. Trunk objects in - SYMBOL_STATS are passed through unchanged. One object is included - in the return value for each line of development described in - SYMBOL_STATS. - - Raise FatalError if there was an error.""" - - errors = [] - mismatches = [] - - if Ctx().symbol_info_filename is not None: - self.symbol_info_file = open(Ctx().symbol_info_filename, 'w') - self.symbol_info_file.write( - '# Columns: project_id symbol_name conversion symbol_path ' - 'preferred_parent_name\n' - ) - else: - self.symbol_info_file = None - - # Initialize each symbol strategy rule a single time, even if it - # is used in more than one project. First define a map from - # object id to symbol strategy rule: - rules = {} - for rule_list in run_options.project_symbol_strategy_rules: - for rule in rule_list: - rules[id(rule)] = rule - - for rule in rules.itervalues(): - rule.start(self.symbol_stats) - - retval = {} - - for stats in self.symbol_stats: - try: - symbol = self.get_symbol(run_options, stats) - except IndeterminateSymbolException, e: - self.log_symbol_summary(stats, stats.lod) - mismatches.append(e.stats) - except SymbolPlanError, e: - self.log_symbol_summary(stats, stats.lod) - errors.append(e) - else: - self.log_symbol_summary(stats, symbol) - retval[stats.lod] = symbol - - for rule in rules.itervalues(): - rule.finish() - - if self.symbol_info_file: - self.symbol_info_file.close() - - del self.symbol_info_file - - if errors or mismatches: - s = ['Problems determining how symbols should be converted:\n'] - for e in errors: - s.append('%s\n' % (e,)) - if mismatches: - s.append( - 'It is not clear how the following symbols ' - 'should be converted.\n' - 'Use --symbol-hints, --force-tag, --force-branch, --exclude, ' - 'and/or\n' - '--symbol-default to resolve the ambiguity.\n' - ) - for stats in mismatches: - s.append(' %s\n' % (stats,)) - raise FatalError(''.join(s)) - else: - return retval - - def run(self, run_options, stats_keeper): - Ctx()._projects = read_projects( - artifact_manager.get_temp_file(config.PROJECTS) - ) - self.symbol_stats = SymbolStatistics( - artifact_manager.get_temp_file(config.SYMBOL_STATISTICS) - ) - - symbol_map = self.get_symbols(run_options) - - # Check the symbols for consistency and bail out if there were errors: - self.symbol_stats.check_consistency(symbol_map) - - # Check that the symbols all have SVN paths set and that the paths - # are disjoint: - Ctx().output_option.check_symbols(symbol_map) - - for symbol in symbol_map.itervalues(): - if isinstance(symbol, ExcludedSymbol): - self.symbol_stats.exclude_symbol(symbol) - - create_symbol_database(symbol_map.values()) - - del self.symbol_stats - - Log().quiet("Done") - - -class FilterSymbolsPass(Pass): - """Delete any branches/tags that are to be excluded. - - Also delete revisions on excluded branches, and delete other - references to the excluded symbols.""" - - def register_artifacts(self): - self._register_temp_file(config.SUMMARY_SERIALIZER) - self._register_temp_file(config.CVS_REVS_SUMMARY_DATAFILE) - self._register_temp_file(config.CVS_SYMBOLS_SUMMARY_DATAFILE) - self._register_temp_file_needed(config.PROJECTS) - self._register_temp_file_needed(config.SYMBOL_DB) - self._register_temp_file_needed(config.CVS_FILES_DB) - self._register_temp_file_needed(config.CVS_ITEMS_STORE) - Ctx().revision_excluder.register_artifacts(self) - - def run(self, run_options, stats_keeper): - Ctx()._projects = read_projects( - artifact_manager.get_temp_file(config.PROJECTS) - ) - Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_READ) - Ctx()._symbol_db = SymbolDatabase() - cvs_item_store = OldCVSItemStore( - artifact_manager.get_temp_file(config.CVS_ITEMS_STORE)) - - cvs_item_serializer = PrimedPickleSerializer(cvs_item_primer) - f = open(artifact_manager.get_temp_file(config.SUMMARY_SERIALIZER), 'wb') - cPickle.dump(cvs_item_serializer, f, -1) - f.close() - - rev_db = NewSortableCVSRevisionDatabase( - artifact_manager.get_temp_file(config.CVS_REVS_SUMMARY_DATAFILE), - cvs_item_serializer, - ) - - symbol_db = NewSortableCVSSymbolDatabase( - artifact_manager.get_temp_file(config.CVS_SYMBOLS_SUMMARY_DATAFILE), - cvs_item_serializer, - ) - - revision_excluder = Ctx().revision_excluder - - Log().quiet("Filtering out excluded symbols and summarizing items...") - - stats_keeper.reset_cvs_rev_info() - revision_excluder.start() - - # Process the cvs items store one file at a time: - for cvs_file_items in cvs_item_store.iter_cvs_file_items(): - Log().verbose(cvs_file_items.cvs_file.filename) - cvs_file_items.filter_excluded_symbols(revision_excluder) - cvs_file_items.mutate_symbols() - cvs_file_items.adjust_parents() - cvs_file_items.refine_symbols() - cvs_file_items.record_opened_symbols() - cvs_file_items.record_closed_symbols() - cvs_file_items.check_link_consistency() - - # Store whatever is left to the new file and update statistics: - stats_keeper.record_cvs_file(cvs_file_items.cvs_file) - for cvs_item in cvs_file_items.values(): - stats_keeper.record_cvs_item(cvs_item) - - if isinstance(cvs_item, CVSRevision): - rev_db.add(cvs_item) - elif isinstance(cvs_item, CVSSymbol): - symbol_db.add(cvs_item) - - stats_keeper.set_stats_reflect_exclude(True) - - rev_db.close() - symbol_db.close() - revision_excluder.finish() - cvs_item_store.close() - Ctx()._symbol_db.close() - Ctx()._cvs_file_db.close() - - Log().quiet("Done") - - -class SortRevisionSummaryPass(Pass): - """Sort the revision summary file.""" - - def register_artifacts(self): - self._register_temp_file(config.CVS_REVS_SUMMARY_SORTED_DATAFILE) - self._register_temp_file_needed(config.CVS_REVS_SUMMARY_DATAFILE) - - def run(self, run_options, stats_keeper): - Log().quiet("Sorting CVS revision summaries...") - sort_file( - artifact_manager.get_temp_file(config.CVS_REVS_SUMMARY_DATAFILE), - artifact_manager.get_temp_file( - config.CVS_REVS_SUMMARY_SORTED_DATAFILE)) - Log().quiet("Done") - - -class SortSymbolSummaryPass(Pass): - """Sort the symbol summary file.""" - - def register_artifacts(self): - self._register_temp_file(config.CVS_SYMBOLS_SUMMARY_SORTED_DATAFILE) - self._register_temp_file_needed(config.CVS_SYMBOLS_SUMMARY_DATAFILE) - - def run(self, run_options, stats_keeper): - Log().quiet("Sorting CVS symbol summaries...") - sort_file( - artifact_manager.get_temp_file(config.CVS_SYMBOLS_SUMMARY_DATAFILE), - artifact_manager.get_temp_file( - config.CVS_SYMBOLS_SUMMARY_SORTED_DATAFILE)) - Log().quiet("Done") - - -class InitializeChangesetsPass(Pass): - """Create preliminary CommitSets.""" - - def register_artifacts(self): - self._register_temp_file(config.CVS_ITEM_TO_CHANGESET) - self._register_temp_file(config.CHANGESETS_STORE) - self._register_temp_file(config.CHANGESETS_INDEX) - self._register_temp_file(config.CVS_ITEMS_SORTED_STORE) - self._register_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE) - self._register_temp_file_needed(config.PROJECTS) - self._register_temp_file_needed(config.SYMBOL_DB) - self._register_temp_file_needed(config.CVS_FILES_DB) - self._register_temp_file_needed(config.SUMMARY_SERIALIZER) - self._register_temp_file_needed(config.CVS_REVS_SUMMARY_SORTED_DATAFILE) - self._register_temp_file_needed( - config.CVS_SYMBOLS_SUMMARY_SORTED_DATAFILE) - - def get_revision_changesets(self): - """Generate revision changesets, one at a time. - - Each time, yield a list of CVSRevisions that might potentially - consititute a changeset.""" - - # Create changesets for CVSRevisions: - old_metadata_id = None - old_timestamp = None - changeset_items = [] - - db = OldSortableCVSRevisionDatabase( - artifact_manager.get_temp_file( - config.CVS_REVS_SUMMARY_SORTED_DATAFILE - ), - self.cvs_item_serializer, - ) - - for cvs_rev in db: - if cvs_rev.metadata_id != old_metadata_id \ - or cvs_rev.timestamp > old_timestamp + config.COMMIT_THRESHOLD: - # Start a new changeset. First finish up the old changeset, - # if any: - if changeset_items: - yield changeset_items - changeset_items = [] - old_metadata_id = cvs_rev.metadata_id - changeset_items.append(cvs_rev) - old_timestamp = cvs_rev.timestamp - - # Finish up the last changeset, if any: - if changeset_items: - yield changeset_items - - def get_symbol_changesets(self): - """Generate symbol changesets, one at a time. - - Each time, yield a list of CVSSymbols that might potentially - consititute a changeset.""" - - old_symbol_id = None - changeset_items = [] - - db = OldSortableCVSSymbolDatabase( - artifact_manager.get_temp_file( - config.CVS_SYMBOLS_SUMMARY_SORTED_DATAFILE - ), - self.cvs_item_serializer, - ) - - for cvs_symbol in db: - if cvs_symbol.symbol.id != old_symbol_id: - # Start a new changeset. First finish up the old changeset, - # if any: - if changeset_items: - yield changeset_items - changeset_items = [] - old_symbol_id = cvs_symbol.symbol.id - changeset_items.append(cvs_symbol) - - # Finish up the last changeset, if any: - if changeset_items: - yield changeset_items - - @staticmethod - def compare_items(a, b): - return ( - cmp(a.timestamp, b.timestamp) - or cmp(a.cvs_file.cvs_path, b.cvs_file.cvs_path) - or cmp([int(x) for x in a.rev.split('.')], - [int(x) for x in b.rev.split('.')]) - or cmp(a.id, b.id)) - - def break_internal_dependencies(self, changeset_items): - """Split up CHANGESET_ITEMS if necessary to break internal dependencies. - - CHANGESET_ITEMS is a list of CVSRevisions that could possibly - belong in a single RevisionChangeset, but there might be internal - dependencies among the items. Return a list of lists, where each - sublist is a list of CVSRevisions and at least one internal - dependency has been eliminated. Iff CHANGESET_ITEMS does not have - to be split, then the return value will contain a single value, - namely the original value of CHANGESET_ITEMS. Split - CHANGESET_ITEMS at most once, even though the resulting changesets - might themselves have internal dependencies.""" - - # We only look for succ dependencies, since by doing so we - # automatically cover pred dependencies as well. First create a - # list of tuples (pred, succ) of id pairs for CVSItems that depend - # on each other. - dependencies = [] - changeset_cvs_item_ids = set([cvs_rev.id for cvs_rev in changeset_items]) - for cvs_item in changeset_items: - for next_id in cvs_item.get_succ_ids(): - if next_id in changeset_cvs_item_ids: - # Sanity check: a CVSItem should never depend on itself: - if next_id == cvs_item.id: - raise InternalError('Item depends on itself: %s' % (cvs_item,)) - - dependencies.append((cvs_item.id, next_id,)) - - if dependencies: - # Sort the changeset_items in a defined order (chronological to the - # extent that the timestamps are correct and unique). - changeset_items.sort(self.compare_items) - indexes = {} - for (i, changeset_item) in enumerate(changeset_items): - indexes[changeset_item.id] = i - # How many internal dependencies would be broken by breaking the - # Changeset after a particular index? - breaks = [0] * len(changeset_items) - for (pred, succ,) in dependencies: - pred_index = indexes[pred] - succ_index = indexes[succ] - breaks[min(pred_index, succ_index)] += 1 - breaks[max(pred_index, succ_index)] -= 1 - best_i = None - best_count = -1 - best_time = 0 - for i in range(1, len(breaks)): - breaks[i] += breaks[i - 1] - for i in range(0, len(breaks) - 1): - if breaks[i] > best_count: - best_i = i - best_count = breaks[i] - best_time = (changeset_items[i + 1].timestamp - - changeset_items[i].timestamp) - elif breaks[i] == best_count \ - and (changeset_items[i + 1].timestamp - - changeset_items[i].timestamp) < best_time: - best_i = i - best_count = breaks[i] - best_time = (changeset_items[i + 1].timestamp - - changeset_items[i].timestamp) - # Reuse the old changeset.id for the first of the split changesets. - return [changeset_items[:best_i + 1], changeset_items[best_i + 1:]] - else: - return [changeset_items] - - def break_all_internal_dependencies(self, changeset_items): - """Keep breaking CHANGESET_ITEMS up to break all internal dependencies. - - CHANGESET_ITEMS is a list of CVSRevisions that could conceivably - be part of a single changeset. Break this list into sublists, - where the CVSRevisions in each sublist are free of mutual - dependencies.""" - - # This method is written non-recursively to avoid any possible - # problems with recursion depth. - - changesets_to_split = [changeset_items] - while changesets_to_split: - changesets = self.break_internal_dependencies(changesets_to_split.pop()) - if len(changesets) == 1: - [changeset_items] = changesets - yield changeset_items - else: - # The changeset had to be split; see if either of the - # fragments have to be split: - changesets.reverse() - changesets_to_split.extend(changesets) - - def get_changesets(self): - """Generate (Changeset, [CVSItem,...]) for all changesets. - - The Changesets already have their internal dependencies broken. - The [CVSItem,...] list is the list of CVSItems in the - corresponding Changeset.""" - - for changeset_items in self.get_revision_changesets(): - for split_changeset_items \ - in self.break_all_internal_dependencies(changeset_items): - yield ( - RevisionChangeset( - self.changeset_key_generator.gen_id(), - [cvs_rev.id for cvs_rev in split_changeset_items] - ), - split_changeset_items, - ) - - for changeset_items in self.get_symbol_changesets(): - yield ( - create_symbol_changeset( - self.changeset_key_generator.gen_id(), - changeset_items[0].symbol, - [cvs_symbol.id for cvs_symbol in changeset_items] - ), - changeset_items, - ) - - def run(self, run_options, stats_keeper): - Log().quiet("Creating preliminary commit sets...") - - Ctx()._projects = read_projects( - artifact_manager.get_temp_file(config.PROJECTS) - ) - Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_READ) - Ctx()._symbol_db = SymbolDatabase() - - f = open(artifact_manager.get_temp_file(config.SUMMARY_SERIALIZER), 'rb') - self.cvs_item_serializer = cPickle.load(f) - f.close() - - changeset_db = ChangesetDatabase( - artifact_manager.get_temp_file(config.CHANGESETS_STORE), - artifact_manager.get_temp_file(config.CHANGESETS_INDEX), - DB_OPEN_NEW, - ) - cvs_item_to_changeset_id = CVSItemToChangesetTable( - artifact_manager.get_temp_file(config.CVS_ITEM_TO_CHANGESET), - DB_OPEN_NEW, - ) - - self.sorted_cvs_items_db = IndexedCVSItemStore( - artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE), - artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE), - DB_OPEN_NEW) - - self.changeset_key_generator = KeyGenerator() - - for (changeset, changeset_items) in self.get_changesets(): - if Log().is_on(Log.DEBUG): - Log().debug(repr(changeset)) - changeset_db.store(changeset) - for cvs_item in changeset_items: - self.sorted_cvs_items_db.add(cvs_item) - cvs_item_to_changeset_id[cvs_item.id] = changeset.id - - self.sorted_cvs_items_db.close() - cvs_item_to_changeset_id.close() - changeset_db.close() - Ctx()._symbol_db.close() - Ctx()._cvs_file_db.close() - - del self.cvs_item_serializer - - Log().quiet("Done") - - -class ProcessedChangesetLogger: - def __init__(self): - self.processed_changeset_ids = [] - - def log(self, changeset_id): - if Log().is_on(Log.DEBUG): - self.processed_changeset_ids.append(changeset_id) - - def flush(self): - if self.processed_changeset_ids: - Log().debug( - 'Consumed changeset ids %s' - % (', '.join(['%x' % id for id in self.processed_changeset_ids]),)) - - del self.processed_changeset_ids[:] - - -class BreakRevisionChangesetCyclesPass(Pass): - """Break up any dependency cycles involving only RevisionChangesets.""" - - def register_artifacts(self): - self._register_temp_file(config.CHANGESETS_REVBROKEN_STORE) - self._register_temp_file(config.CHANGESETS_REVBROKEN_INDEX) - self._register_temp_file(config.CVS_ITEM_TO_CHANGESET_REVBROKEN) - self._register_temp_file_needed(config.PROJECTS) - self._register_temp_file_needed(config.SYMBOL_DB) - self._register_temp_file_needed(config.CVS_FILES_DB) - self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE) - self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE) - self._register_temp_file_needed(config.CHANGESETS_STORE) - self._register_temp_file_needed(config.CHANGESETS_INDEX) - self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET) - - def get_source_changesets(self): - old_changeset_db = ChangesetDatabase( - artifact_manager.get_temp_file(config.CHANGESETS_STORE), - artifact_manager.get_temp_file(config.CHANGESETS_INDEX), - DB_OPEN_READ) - - changeset_ids = old_changeset_db.keys() - - for changeset_id in changeset_ids: - yield old_changeset_db[changeset_id] - - old_changeset_db.close() - del old_changeset_db - - def break_cycle(self, cycle): - """Break up one or more changesets in CYCLE to help break the cycle. - - CYCLE is a list of Changesets where - - cycle[i] depends on cycle[i - 1] - - Break up one or more changesets in CYCLE to make progress towards - breaking the cycle. Update self.changeset_graph accordingly. - - It is not guaranteed that the cycle will be broken by one call to - this routine, but at least some progress must be made.""" - - self.processed_changeset_logger.flush() - best_i = None - best_link = None - for i in range(len(cycle)): - # It's OK if this index wraps to -1: - link = ChangesetGraphLink( - cycle[i - 1], cycle[i], cycle[i + 1 - len(cycle)]) - - if best_i is None or link < best_link: - best_i = i - best_link = link - - if Log().is_on(Log.DEBUG): - Log().debug( - 'Breaking cycle %s by breaking node %x' % ( - ' -> '.join(['%x' % node.id for node in (cycle + [cycle[0]])]), - best_link.changeset.id,)) - - new_changesets = best_link.break_changeset(self.changeset_key_generator) - - self.changeset_graph.delete_changeset(best_link.changeset) - - for changeset in new_changesets: - self.changeset_graph.add_new_changeset(changeset) - - def run(self, run_options, stats_keeper): - Log().quiet("Breaking revision changeset dependency cycles...") - - Ctx()._projects = read_projects( - artifact_manager.get_temp_file(config.PROJECTS) - ) - Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_READ) - Ctx()._symbol_db = SymbolDatabase() - Ctx()._cvs_items_db = IndexedCVSItemStore( - artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE), - artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE), - DB_OPEN_READ) - - shutil.copyfile( - artifact_manager.get_temp_file( - config.CVS_ITEM_TO_CHANGESET), - artifact_manager.get_temp_file( - config.CVS_ITEM_TO_CHANGESET_REVBROKEN)) - cvs_item_to_changeset_id = CVSItemToChangesetTable( - artifact_manager.get_temp_file( - config.CVS_ITEM_TO_CHANGESET_REVBROKEN), - DB_OPEN_WRITE) - - changeset_db = ChangesetDatabase( - artifact_manager.get_temp_file(config.CHANGESETS_REVBROKEN_STORE), - artifact_manager.get_temp_file(config.CHANGESETS_REVBROKEN_INDEX), - DB_OPEN_NEW) - - self.changeset_graph = ChangesetGraph( - changeset_db, cvs_item_to_changeset_id - ) - - max_changeset_id = 0 - for changeset in self.get_source_changesets(): - changeset_db.store(changeset) - if isinstance(changeset, RevisionChangeset): - self.changeset_graph.add_changeset(changeset) - max_changeset_id = max(max_changeset_id, changeset.id) - - self.changeset_key_generator = KeyGenerator(max_changeset_id + 1) - - self.processed_changeset_logger = ProcessedChangesetLogger() - - # Consume the graph, breaking cycles using self.break_cycle(): - for (changeset, time_range) in self.changeset_graph.consume_graph( - cycle_breaker=self.break_cycle - ): - self.processed_changeset_logger.log(changeset.id) - - self.processed_changeset_logger.flush() - del self.processed_changeset_logger - - self.changeset_graph.close() - self.changeset_graph = None - Ctx()._cvs_items_db.close() - Ctx()._symbol_db.close() - Ctx()._cvs_file_db.close() - - Log().quiet("Done") - - -class RevisionTopologicalSortPass(Pass): - """Sort RevisionChangesets into commit order. - - Also convert them to OrderedChangesets, without changing their ids.""" - - def register_artifacts(self): - self._register_temp_file(config.CHANGESETS_REVSORTED_STORE) - self._register_temp_file(config.CHANGESETS_REVSORTED_INDEX) - self._register_temp_file_needed(config.PROJECTS) - self._register_temp_file_needed(config.SYMBOL_DB) - self._register_temp_file_needed(config.CVS_FILES_DB) - self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE) - self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE) - self._register_temp_file_needed(config.CHANGESETS_REVBROKEN_STORE) - self._register_temp_file_needed(config.CHANGESETS_REVBROKEN_INDEX) - self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET_REVBROKEN) - - def get_source_changesets(self, changeset_db): - changeset_ids = changeset_db.keys() - - for changeset_id in changeset_ids: - yield changeset_db[changeset_id] - - def get_changesets(self): - changeset_db = ChangesetDatabase( - artifact_manager.get_temp_file(config.CHANGESETS_REVBROKEN_STORE), - artifact_manager.get_temp_file(config.CHANGESETS_REVBROKEN_INDEX), - DB_OPEN_READ, - ) - - changeset_graph = ChangesetGraph( - changeset_db, - CVSItemToChangesetTable( - artifact_manager.get_temp_file( - config.CVS_ITEM_TO_CHANGESET_REVBROKEN - ), - DB_OPEN_READ, - ) - ) - - for changeset in self.get_source_changesets(changeset_db): - if isinstance(changeset, RevisionChangeset): - changeset_graph.add_changeset(changeset) - else: - yield changeset - - changeset_ids = [] - - # Sentry: - changeset_ids.append(None) - - for (changeset, time_range) in changeset_graph.consume_graph(): - changeset_ids.append(changeset.id) - - # Sentry: - changeset_ids.append(None) - - for i in range(1, len(changeset_ids) - 1): - changeset = changeset_db[changeset_ids[i]] - yield OrderedChangeset( - changeset.id, changeset.cvs_item_ids, i - 1, - changeset_ids[i - 1], changeset_ids[i + 1]) - - changeset_graph.close() - - def run(self, run_options, stats_keeper): - Log().quiet("Generating CVSRevisions in commit order...") - - Ctx()._projects = read_projects( - artifact_manager.get_temp_file(config.PROJECTS) - ) - Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_READ) - Ctx()._symbol_db = SymbolDatabase() - Ctx()._cvs_items_db = IndexedCVSItemStore( - artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE), - artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE), - DB_OPEN_READ) - - changesets_revordered_db = ChangesetDatabase( - artifact_manager.get_temp_file(config.CHANGESETS_REVSORTED_STORE), - artifact_manager.get_temp_file(config.CHANGESETS_REVSORTED_INDEX), - DB_OPEN_NEW) - - for changeset in self.get_changesets(): - changesets_revordered_db.store(changeset) - - changesets_revordered_db.close() - Ctx()._cvs_items_db.close() - Ctx()._symbol_db.close() - Ctx()._cvs_file_db.close() - - Log().quiet("Done") - - -class BreakSymbolChangesetCyclesPass(Pass): - """Break up any dependency cycles involving only SymbolChangesets.""" - - def register_artifacts(self): - self._register_temp_file(config.CHANGESETS_SYMBROKEN_STORE) - self._register_temp_file(config.CHANGESETS_SYMBROKEN_INDEX) - self._register_temp_file(config.CVS_ITEM_TO_CHANGESET_SYMBROKEN) - self._register_temp_file_needed(config.PROJECTS) - self._register_temp_file_needed(config.SYMBOL_DB) - self._register_temp_file_needed(config.CVS_FILES_DB) - self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE) - self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE) - self._register_temp_file_needed(config.CHANGESETS_REVSORTED_STORE) - self._register_temp_file_needed(config.CHANGESETS_REVSORTED_INDEX) - self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET_REVBROKEN) - - def get_source_changesets(self): - old_changeset_db = ChangesetDatabase( - artifact_manager.get_temp_file(config.CHANGESETS_REVSORTED_STORE), - artifact_manager.get_temp_file(config.CHANGESETS_REVSORTED_INDEX), - DB_OPEN_READ) - - changeset_ids = old_changeset_db.keys() - - for changeset_id in changeset_ids: - yield old_changeset_db[changeset_id] - - old_changeset_db.close() - - def break_cycle(self, cycle): - """Break up one or more changesets in CYCLE to help break the cycle. - - CYCLE is a list of Changesets where - - cycle[i] depends on cycle[i - 1] - - Break up one or more changesets in CYCLE to make progress towards - breaking the cycle. Update self.changeset_graph accordingly. - - It is not guaranteed that the cycle will be broken by one call to - this routine, but at least some progress must be made.""" - - self.processed_changeset_logger.flush() - best_i = None - best_link = None - for i in range(len(cycle)): - # It's OK if this index wraps to -1: - link = ChangesetGraphLink( - cycle[i - 1], cycle[i], cycle[i + 1 - len(cycle)]) - - if best_i is None or link < best_link: - best_i = i - best_link = link - - if Log().is_on(Log.DEBUG): - Log().debug( - 'Breaking cycle %s by breaking node %x' % ( - ' -> '.join(['%x' % node.id for node in (cycle + [cycle[0]])]), - best_link.changeset.id,)) - - new_changesets = best_link.break_changeset(self.changeset_key_generator) - - self.changeset_graph.delete_changeset(best_link.changeset) - - for changeset in new_changesets: - self.changeset_graph.add_new_changeset(changeset) - - def run(self, run_options, stats_keeper): - Log().quiet("Breaking symbol changeset dependency cycles...") - - Ctx()._projects = read_projects( - artifact_manager.get_temp_file(config.PROJECTS) - ) - Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_READ) - Ctx()._symbol_db = SymbolDatabase() - Ctx()._cvs_items_db = IndexedCVSItemStore( - artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE), - artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE), - DB_OPEN_READ) - - shutil.copyfile( - artifact_manager.get_temp_file( - config.CVS_ITEM_TO_CHANGESET_REVBROKEN), - artifact_manager.get_temp_file( - config.CVS_ITEM_TO_CHANGESET_SYMBROKEN)) - cvs_item_to_changeset_id = CVSItemToChangesetTable( - artifact_manager.get_temp_file( - config.CVS_ITEM_TO_CHANGESET_SYMBROKEN), - DB_OPEN_WRITE) - - changeset_db = ChangesetDatabase( - artifact_manager.get_temp_file(config.CHANGESETS_SYMBROKEN_STORE), - artifact_manager.get_temp_file(config.CHANGESETS_SYMBROKEN_INDEX), - DB_OPEN_NEW) - - self.changeset_graph = ChangesetGraph( - changeset_db, cvs_item_to_changeset_id - ) - - max_changeset_id = 0 - for changeset in self.get_source_changesets(): - changeset_db.store(changeset) - if isinstance(changeset, SymbolChangeset): - self.changeset_graph.add_changeset(changeset) - max_changeset_id = max(max_changeset_id, changeset.id) - - self.changeset_key_generator = KeyGenerator(max_changeset_id + 1) - - self.processed_changeset_logger = ProcessedChangesetLogger() - - # Consume the graph, breaking cycles using self.break_cycle(): - for (changeset, time_range) in self.changeset_graph.consume_graph( - cycle_breaker=self.break_cycle - ): - self.processed_changeset_logger.log(changeset.id) - - self.processed_changeset_logger.flush() - del self.processed_changeset_logger - - self.changeset_graph.close() - self.changeset_graph = None - Ctx()._cvs_items_db.close() - Ctx()._symbol_db.close() - Ctx()._cvs_file_db.close() - - Log().quiet("Done") - - -class BreakAllChangesetCyclesPass(Pass): - """Break up any dependency cycles that are closed by SymbolChangesets.""" - - def register_artifacts(self): - self._register_temp_file(config.CHANGESETS_ALLBROKEN_STORE) - self._register_temp_file(config.CHANGESETS_ALLBROKEN_INDEX) - self._register_temp_file(config.CVS_ITEM_TO_CHANGESET_ALLBROKEN) - self._register_temp_file_needed(config.PROJECTS) - self._register_temp_file_needed(config.SYMBOL_DB) - self._register_temp_file_needed(config.CVS_FILES_DB) - self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE) - self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE) - self._register_temp_file_needed(config.CHANGESETS_SYMBROKEN_STORE) - self._register_temp_file_needed(config.CHANGESETS_SYMBROKEN_INDEX) - self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET_SYMBROKEN) - - def get_source_changesets(self): - old_changeset_db = ChangesetDatabase( - artifact_manager.get_temp_file(config.CHANGESETS_SYMBROKEN_STORE), - artifact_manager.get_temp_file(config.CHANGESETS_SYMBROKEN_INDEX), - DB_OPEN_READ) - - changeset_ids = old_changeset_db.keys() - - for changeset_id in changeset_ids: - yield old_changeset_db[changeset_id] - - old_changeset_db.close() - - def _split_retrograde_changeset(self, changeset): - """CHANGESET is retrograde. Split it into non-retrograde changesets.""" - - Log().debug('Breaking retrograde changeset %x' % (changeset.id,)) - - self.changeset_graph.delete_changeset(changeset) - - # A map { cvs_branch_id : (max_pred_ordinal, min_succ_ordinal) } - ordinal_limits = {} - for cvs_branch in changeset.iter_cvs_items(): - max_pred_ordinal = 0 - min_succ_ordinal = sys.maxint - - for pred_id in cvs_branch.get_pred_ids(): - pred_ordinal = self.ordinals.get( - self.cvs_item_to_changeset_id[pred_id], 0) - max_pred_ordinal = max(max_pred_ordinal, pred_ordinal) - - for succ_id in cvs_branch.get_succ_ids(): - succ_ordinal = self.ordinals.get( - self.cvs_item_to_changeset_id[succ_id], sys.maxint) - min_succ_ordinal = min(min_succ_ordinal, succ_ordinal) - - assert max_pred_ordinal < min_succ_ordinal - ordinal_limits[cvs_branch.id] = (max_pred_ordinal, min_succ_ordinal,) - - # Find the earliest successor ordinal: - min_min_succ_ordinal = sys.maxint - for (max_pred_ordinal, min_succ_ordinal) in ordinal_limits.values(): - min_min_succ_ordinal = min(min_min_succ_ordinal, min_succ_ordinal) - - early_item_ids = [] - late_item_ids = [] - for (id, (max_pred_ordinal, min_succ_ordinal)) in ordinal_limits.items(): - if max_pred_ordinal >= min_min_succ_ordinal: - late_item_ids.append(id) - else: - early_item_ids.append(id) - - assert early_item_ids - assert late_item_ids - - early_changeset = changeset.create_split_changeset( - self.changeset_key_generator.gen_id(), early_item_ids) - late_changeset = changeset.create_split_changeset( - self.changeset_key_generator.gen_id(), late_item_ids) - - self.changeset_graph.add_new_changeset(early_changeset) - self.changeset_graph.add_new_changeset(late_changeset) - - early_split = self._split_if_retrograde(early_changeset.id) - - # Because of the way we constructed it, the early changeset should - # not have to be split: - assert not early_split - - self._split_if_retrograde(late_changeset.id) - - def _split_if_retrograde(self, changeset_id): - node = self.changeset_graph[changeset_id] - pred_ordinals = [ - self.ordinals[id] - for id in node.pred_ids - if id in self.ordinals - ] - pred_ordinals.sort() - succ_ordinals = [ - self.ordinals[id] - for id in node.succ_ids - if id in self.ordinals - ] - succ_ordinals.sort() - if pred_ordinals and succ_ordinals \ - and pred_ordinals[-1] >= succ_ordinals[0]: - self._split_retrograde_changeset(self.changeset_db[node.id]) - return True - else: - return False - - def break_segment(self, segment): - """Break a changeset in SEGMENT[1:-1]. - - The range SEGMENT[1:-1] is not empty, and all of the changesets in - that range are SymbolChangesets.""" - - best_i = None - best_link = None - for i in range(1, len(segment) - 1): - link = ChangesetGraphLink(segment[i - 1], segment[i], segment[i + 1]) - - if best_i is None or link < best_link: - best_i = i - best_link = link - - if Log().is_on(Log.DEBUG): - Log().debug( - 'Breaking segment %s by breaking node %x' % ( - ' -> '.join(['%x' % node.id for node in segment]), - best_link.changeset.id,)) - - new_changesets = best_link.break_changeset(self.changeset_key_generator) - - self.changeset_graph.delete_changeset(best_link.changeset) - - for changeset in new_changesets: - self.changeset_graph.add_new_changeset(changeset) - - def break_cycle(self, cycle): - """Break up one or more SymbolChangesets in CYCLE to help break the cycle. - - CYCLE is a list of SymbolChangesets where - - cycle[i] depends on cycle[i - 1] - - . Break up one or more changesets in CYCLE to make progress - towards breaking the cycle. Update self.changeset_graph - accordingly. - - It is not guaranteed that the cycle will be broken by one call to - this routine, but at least some progress must be made.""" - - if Log().is_on(Log.DEBUG): - Log().debug( - 'Breaking cycle %s' % ( - ' -> '.join(['%x' % changeset.id - for changeset in cycle + [cycle[0]]]),)) - - # Unwrap the cycle into a segment then break the segment: - self.break_segment([cycle[-1]] + cycle + [cycle[0]]) - - def run(self, run_options, stats_keeper): - Log().quiet("Breaking CVSSymbol dependency loops...") - - Ctx()._projects = read_projects( - artifact_manager.get_temp_file(config.PROJECTS) - ) - Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_READ) - Ctx()._symbol_db = SymbolDatabase() - Ctx()._cvs_items_db = IndexedCVSItemStore( - artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE), - artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE), - DB_OPEN_READ) - - shutil.copyfile( - artifact_manager.get_temp_file( - config.CVS_ITEM_TO_CHANGESET_SYMBROKEN), - artifact_manager.get_temp_file( - config.CVS_ITEM_TO_CHANGESET_ALLBROKEN)) - self.cvs_item_to_changeset_id = CVSItemToChangesetTable( - artifact_manager.get_temp_file( - config.CVS_ITEM_TO_CHANGESET_ALLBROKEN), - DB_OPEN_WRITE) - - self.changeset_db = ChangesetDatabase( - artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_STORE), - artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_INDEX), - DB_OPEN_NEW) - - self.changeset_graph = ChangesetGraph( - self.changeset_db, self.cvs_item_to_changeset_id - ) - - # A map {changeset_id : ordinal} for OrderedChangesets: - self.ordinals = {} - # A map {ordinal : changeset_id}: - ordered_changeset_map = {} - # A list of all BranchChangeset ids: - branch_changeset_ids = [] - max_changeset_id = 0 - for changeset in self.get_source_changesets(): - self.changeset_db.store(changeset) - self.changeset_graph.add_changeset(changeset) - if isinstance(changeset, OrderedChangeset): - ordered_changeset_map[changeset.ordinal] = changeset.id - self.ordinals[changeset.id] = changeset.ordinal - elif isinstance(changeset, BranchChangeset): - branch_changeset_ids.append(changeset.id) - max_changeset_id = max(max_changeset_id, changeset.id) - - # An array of ordered_changeset ids, indexed by ordinal: - ordered_changesets = [] - for ordinal in range(len(ordered_changeset_map)): - id = ordered_changeset_map[ordinal] - ordered_changesets.append(id) - - ordered_changeset_ids = set(ordered_changeset_map.values()) - del ordered_changeset_map - - self.changeset_key_generator = KeyGenerator(max_changeset_id + 1) - - # First we scan through all BranchChangesets looking for - # changesets that are individually "retrograde" and splitting - # those up: - for changeset_id in branch_changeset_ids: - self._split_if_retrograde(changeset_id) - - del self.ordinals - - next_ordered_changeset = 0 - - self.processed_changeset_logger = ProcessedChangesetLogger() - - while self.changeset_graph: - # Consume any nodes that don't have predecessors: - for (changeset, time_range) \ - in self.changeset_graph.consume_nopred_nodes(): - self.processed_changeset_logger.log(changeset.id) - if changeset.id in ordered_changeset_ids: - next_ordered_changeset += 1 - ordered_changeset_ids.remove(changeset.id) - - self.processed_changeset_logger.flush() - - if not self.changeset_graph: - break - - # Now work on the next ordered changeset that has not yet been - # processed. BreakSymbolChangesetCyclesPass has broken any - # cycles involving only SymbolChangesets, so the presence of a - # cycle implies that there is at least one ordered changeset - # left in the graph: - assert next_ordered_changeset < len(ordered_changesets) - - id = ordered_changesets[next_ordered_changeset] - path = self.changeset_graph.search_for_path(id, ordered_changeset_ids) - if path: - if Log().is_on(Log.DEBUG): - Log().debug('Breaking path from %s to %s' % (path[0], path[-1],)) - self.break_segment(path) - else: - # There were no ordered changesets among the reachable - # predecessors, so do generic cycle-breaking: - if Log().is_on(Log.DEBUG): - Log().debug( - 'Breaking generic cycle found from %s' - % (self.changeset_db[id],) - ) - self.break_cycle(self.changeset_graph.find_cycle(id)) - - del self.processed_changeset_logger - self.changeset_graph.close() - self.changeset_graph = None - self.cvs_item_to_changeset_id = None - self.changeset_db = None - - Log().quiet("Done") - - -class TopologicalSortPass(Pass): - """Sort changesets into commit order.""" - - def register_artifacts(self): - self._register_temp_file(config.CHANGESETS_SORTED_DATAFILE) - self._register_temp_file_needed(config.PROJECTS) - self._register_temp_file_needed(config.SYMBOL_DB) - self._register_temp_file_needed(config.CVS_FILES_DB) - self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE) - self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE) - self._register_temp_file_needed(config.CHANGESETS_ALLBROKEN_STORE) - self._register_temp_file_needed(config.CHANGESETS_ALLBROKEN_INDEX) - self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET_ALLBROKEN) - - def get_source_changesets(self, changeset_db): - for changeset_id in changeset_db.keys(): - yield changeset_db[changeset_id] - - def get_changesets(self): - """Generate (changeset, timestamp) pairs in commit order.""" - - changeset_db = ChangesetDatabase( - artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_STORE), - artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_INDEX), - DB_OPEN_READ) - - changeset_graph = ChangesetGraph( - changeset_db, - CVSItemToChangesetTable( - artifact_manager.get_temp_file( - config.CVS_ITEM_TO_CHANGESET_ALLBROKEN - ), - DB_OPEN_READ, - ), - ) - symbol_changeset_ids = set() - - for changeset in self.get_source_changesets(changeset_db): - changeset_graph.add_changeset(changeset) - if isinstance(changeset, SymbolChangeset): - symbol_changeset_ids.add(changeset.id) - - # Ensure a monotonically-increasing timestamp series by keeping - # track of the previous timestamp and ensuring that the following - # one is larger. - timestamper = Timestamper() - - for (changeset, time_range) in changeset_graph.consume_graph(): - timestamp = timestamper.get( - time_range.t_max, changeset.id in symbol_changeset_ids - ) - yield (changeset, timestamp) - - changeset_graph.close() - - def run(self, run_options, stats_keeper): - Log().quiet("Generating CVSRevisions in commit order...") - - Ctx()._projects = read_projects( - artifact_manager.get_temp_file(config.PROJECTS) - ) - Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_READ) - Ctx()._symbol_db = SymbolDatabase() - Ctx()._cvs_items_db = IndexedCVSItemStore( - artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE), - artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE), - DB_OPEN_READ) - - sorted_changesets = open( - artifact_manager.get_temp_file(config.CHANGESETS_SORTED_DATAFILE), - 'w') - - for (changeset, timestamp) in self.get_changesets(): - sorted_changesets.write('%x %08x\n' % (changeset.id, timestamp,)) - - sorted_changesets.close() - - Ctx()._cvs_items_db.close() - Ctx()._symbol_db.close() - Ctx()._cvs_file_db.close() - - Log().quiet("Done") - - -class CreateRevsPass(Pass): - """Generate the SVNCommit <-> CVSRevision mapping databases. - - SVNCommitCreator also calls SymbolingsLogger to register - CVSRevisions that represent an opening or closing for a path on a - branch or tag. See SymbolingsLogger for more details. - - This pass was formerly known as pass5.""" - - def register_artifacts(self): - self._register_temp_file(config.SVN_COMMITS_INDEX_TABLE) - self._register_temp_file(config.SVN_COMMITS_STORE) - self._register_temp_file(config.CVS_REVS_TO_SVN_REVNUMS) - self._register_temp_file(config.SYMBOL_OPENINGS_CLOSINGS) - self._register_temp_file_needed(config.PROJECTS) - self._register_temp_file_needed(config.CVS_FILES_DB) - self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE) - self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE) - self._register_temp_file_needed(config.SYMBOL_DB) - self._register_temp_file_needed(config.CHANGESETS_ALLBROKEN_STORE) - self._register_temp_file_needed(config.CHANGESETS_ALLBROKEN_INDEX) - self._register_temp_file_needed(config.CHANGESETS_SORTED_DATAFILE) - - def get_changesets(self): - """Generate (changeset,timestamp,) tuples in commit order.""" - - changeset_db = ChangesetDatabase( - artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_STORE), - artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_INDEX), - DB_OPEN_READ) - - for line in file( - artifact_manager.get_temp_file( - config.CHANGESETS_SORTED_DATAFILE)): - [changeset_id, timestamp] = [int(s, 16) for s in line.strip().split()] - yield (changeset_db[changeset_id], timestamp) - - changeset_db.close() - - def get_svn_commits(self, creator): - """Generate the SVNCommits, in order.""" - - for (changeset, timestamp) in self.get_changesets(): - for svn_commit in creator.process_changeset(changeset, timestamp): - yield svn_commit - - def log_svn_commit(self, svn_commit): - """Output information about SVN_COMMIT.""" - - Log().normal( - 'Creating Subversion r%d (%s)' - % (svn_commit.revnum, svn_commit.get_description(),) - ) - - if isinstance(svn_commit, SVNRevisionCommit): - for cvs_rev in svn_commit.cvs_revs: - Log().verbose(' %s %s' % (cvs_rev.cvs_path, cvs_rev.rev,)) - - def run(self, run_options, stats_keeper): - Log().quiet("Mapping CVS revisions to Subversion commits...") - - Ctx()._projects = read_projects( - artifact_manager.get_temp_file(config.PROJECTS) - ) - Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_READ) - Ctx()._symbol_db = SymbolDatabase() - Ctx()._cvs_items_db = IndexedCVSItemStore( - artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE), - artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE), - DB_OPEN_READ) - - Ctx()._symbolings_logger = SymbolingsLogger() - - persistence_manager = PersistenceManager(DB_OPEN_NEW) - - creator = SVNCommitCreator() - for svn_commit in self.get_svn_commits(creator): - self.log_svn_commit(svn_commit) - persistence_manager.put_svn_commit(svn_commit) - - stats_keeper.set_svn_rev_count(creator.revnum_generator.get_last_id()) - del creator - - persistence_manager.close() - Ctx()._symbolings_logger.close() - Ctx()._cvs_items_db.close() - Ctx()._symbol_db.close() - Ctx()._cvs_file_db.close() - - Log().quiet("Done") - - -class SortSymbolsPass(Pass): - """This pass was formerly known as pass6.""" - - def register_artifacts(self): - self._register_temp_file(config.SYMBOL_OPENINGS_CLOSINGS_SORTED) - self._register_temp_file_needed(config.SYMBOL_OPENINGS_CLOSINGS) - - def run(self, run_options, stats_keeper): - Log().quiet("Sorting symbolic name source revisions...") - - sort_file( - artifact_manager.get_temp_file(config.SYMBOL_OPENINGS_CLOSINGS), - artifact_manager.get_temp_file( - config.SYMBOL_OPENINGS_CLOSINGS_SORTED), - options=['-k', '1,1', '-k', '2,2n', '-k', '3'], - ) - Log().quiet("Done") - - -class IndexSymbolsPass(Pass): - """This pass was formerly known as pass7.""" - - def register_artifacts(self): - self._register_temp_file(config.SYMBOL_OFFSETS_DB) - self._register_temp_file_needed(config.PROJECTS) - self._register_temp_file_needed(config.SYMBOL_DB) - self._register_temp_file_needed(config.SYMBOL_OPENINGS_CLOSINGS_SORTED) - - def generate_offsets_for_symbolings(self): - """This function iterates through all the lines in - SYMBOL_OPENINGS_CLOSINGS_SORTED, writing out a file mapping - SYMBOLIC_NAME to the file offset in SYMBOL_OPENINGS_CLOSINGS_SORTED - where SYMBOLIC_NAME is first encountered. This will allow us to - seek to the various offsets in the file and sequentially read only - the openings and closings that we need.""" - - offsets = {} - - f = open( - artifact_manager.get_temp_file( - config.SYMBOL_OPENINGS_CLOSINGS_SORTED), - 'r') - old_id = None - while True: - fpos = f.tell() - line = f.readline() - if not line: - break - id, svn_revnum, ignored = line.split(" ", 2) - id = int(id, 16) - if id != old_id: - Log().verbose(' ', Ctx()._symbol_db.get_symbol(id).name) - old_id = id - offsets[id] = fpos - - f.close() - - offsets_db = file( - artifact_manager.get_temp_file(config.SYMBOL_OFFSETS_DB), 'wb') - cPickle.dump(offsets, offsets_db, -1) - offsets_db.close() - - def run(self, run_options, stats_keeper): - Log().quiet("Determining offsets for all symbolic names...") - Ctx()._projects = read_projects( - artifact_manager.get_temp_file(config.PROJECTS) - ) - Ctx()._symbol_db = SymbolDatabase() - self.generate_offsets_for_symbolings() - Ctx()._symbol_db.close() - Log().quiet("Done.") - - -class OutputPass(Pass): - """This pass was formerly known as pass8.""" - - def register_artifacts(self): - self._register_temp_file_needed(config.PROJECTS) - self._register_temp_file_needed(config.CVS_FILES_DB) - self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE) - self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE) - self._register_temp_file_needed(config.SYMBOL_DB) - self._register_temp_file_needed(config.METADATA_CLEAN_INDEX_TABLE) - self._register_temp_file_needed(config.METADATA_CLEAN_STORE) - self._register_temp_file_needed(config.SVN_COMMITS_INDEX_TABLE) - self._register_temp_file_needed(config.SVN_COMMITS_STORE) - self._register_temp_file_needed(config.CVS_REVS_TO_SVN_REVNUMS) - Ctx().output_option.register_artifacts(self) - - def get_svn_commits(self): - """Generate the SVNCommits in commit order.""" - - persistence_manager = PersistenceManager(DB_OPEN_READ) - - svn_revnum = 1 # The first non-trivial commit - - # Peek at the first revision to find the date to use to initialize - # the repository: - svn_commit = persistence_manager.get_svn_commit(svn_revnum) - - while svn_commit: - yield svn_commit - svn_revnum += 1 - svn_commit = persistence_manager.get_svn_commit(svn_revnum) - - persistence_manager.close() - - def run(self, run_options, stats_keeper): - Ctx()._projects = read_projects( - artifact_manager.get_temp_file(config.PROJECTS) - ) - Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_READ) - Ctx()._metadata_db = MetadataDatabase( - artifact_manager.get_temp_file(config.METADATA_CLEAN_STORE), - artifact_manager.get_temp_file(config.METADATA_CLEAN_INDEX_TABLE), - DB_OPEN_READ, - ) - Ctx()._cvs_items_db = IndexedCVSItemStore( - artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE), - artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE), - DB_OPEN_READ) - Ctx()._symbol_db = SymbolDatabase() - - Ctx().output_option.setup(stats_keeper.svn_rev_count()) - - for svn_commit in self.get_svn_commits(): - svn_commit.output(Ctx().output_option) - - Ctx().output_option.cleanup() - - Ctx()._symbol_db.close() - Ctx()._cvs_items_db.close() - Ctx()._metadata_db.close() - Ctx()._cvs_file_db.close() - - -# The list of passes constituting a run of cvs2svn: -passes = [ - CollectRevsPass(), - CleanMetadataPass(), - CollateSymbolsPass(), - #CheckItemStoreDependenciesPass(config.CVS_ITEMS_STORE), - FilterSymbolsPass(), - SortRevisionSummaryPass(), - SortSymbolSummaryPass(), - InitializeChangesetsPass(), - #CheckIndexedItemStoreDependenciesPass( - # config.CVS_ITEMS_SORTED_STORE, - # config.CVS_ITEMS_SORTED_INDEX_TABLE), - BreakRevisionChangesetCyclesPass(), - RevisionTopologicalSortPass(), - BreakSymbolChangesetCyclesPass(), - BreakAllChangesetCyclesPass(), - TopologicalSortPass(), - CreateRevsPass(), - SortSymbolsPass(), - IndexSymbolsPass(), - OutputPass(), - ] - - diff --git a/cvs2svn_lib/persistence_manager.py b/cvs2svn_lib/persistence_manager.py deleted file mode 100644 index 8a622ab..0000000 --- a/cvs2svn_lib/persistence_manager.py +++ /dev/null @@ -1,106 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2000-2008 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""This module contains class PersistenceManager.""" - - -from cvs2svn_lib import config -from cvs2svn_lib.common import DB_OPEN_NEW -from cvs2svn_lib.common import DB_OPEN_READ -from cvs2svn_lib.common import SVN_INVALID_REVNUM -from cvs2svn_lib.artifact_manager import artifact_manager -from cvs2svn_lib.record_table import SignedIntegerPacker -from cvs2svn_lib.record_table import RecordTable -from cvs2svn_lib.serializer import PrimedPickleSerializer -from cvs2svn_lib.database import IndexedDatabase -from cvs2svn_lib.svn_commit import SVNRevisionCommit -from cvs2svn_lib.svn_commit import SVNInitialProjectCommit -from cvs2svn_lib.svn_commit import SVNPrimaryCommit -from cvs2svn_lib.svn_commit import SVNBranchCommit -from cvs2svn_lib.svn_commit import SVNTagCommit -from cvs2svn_lib.svn_commit import SVNPostCommit - - -class PersistenceManager: - """The PersistenceManager allows us to effectively store SVNCommits - to disk and retrieve them later using only their subversion revision - number as the key. It also returns the subversion revision number - for a given CVSRevision's unique key. - - All information pertinent to each SVNCommit is stored in a series of - on-disk databases so that SVNCommits can be retrieved on-demand. - - MODE is one of the constants DB_OPEN_NEW or DB_OPEN_READ. - In 'new' mode, PersistenceManager will initialize a new set of on-disk - databases and be fully-featured. - In 'read' mode, PersistenceManager will open existing on-disk databases - and the set_* methods will be unavailable.""" - - def __init__(self, mode): - self.mode = mode - if mode not in (DB_OPEN_NEW, DB_OPEN_READ): - raise RuntimeError, "Invalid 'mode' argument to PersistenceManager" - primer = ( - SVNInitialProjectCommit, - SVNPrimaryCommit, - SVNPostCommit, - SVNBranchCommit, - SVNTagCommit, - ) - serializer = PrimedPickleSerializer(primer) - self.svn_commit_db = IndexedDatabase( - artifact_manager.get_temp_file(config.SVN_COMMITS_INDEX_TABLE), - artifact_manager.get_temp_file(config.SVN_COMMITS_STORE), - mode, serializer) - self.cvs2svn_db = RecordTable( - artifact_manager.get_temp_file(config.CVS_REVS_TO_SVN_REVNUMS), - mode, SignedIntegerPacker(SVN_INVALID_REVNUM)) - - def get_svn_revnum(self, cvs_rev_id): - """Return the Subversion revision number in which CVS_REV_ID was - committed, or SVN_INVALID_REVNUM if there is no mapping for - CVS_REV_ID.""" - - return self.cvs2svn_db.get(cvs_rev_id, SVN_INVALID_REVNUM) - - def get_svn_commit(self, svn_revnum): - """Return an SVNCommit that corresponds to SVN_REVNUM. - - If no SVNCommit exists for revnum SVN_REVNUM, then return None.""" - - return self.svn_commit_db.get(svn_revnum, None) - - def put_svn_commit(self, svn_commit): - """Record the bidirectional mapping between SVN_REVNUM and - CVS_REVS and record associated attributes.""" - - if self.mode == DB_OPEN_READ: - raise RuntimeError, \ - 'Write operation attempted on read-only PersistenceManager' - - self.svn_commit_db[svn_commit.revnum] = svn_commit - - if isinstance(svn_commit, SVNRevisionCommit): - for cvs_rev in svn_commit.cvs_revs: - self.cvs2svn_db[cvs_rev.id] = svn_commit.revnum - - def close(self): - self.cvs2svn_db.close() - self.cvs2svn_db = None - self.svn_commit_db.close() - self.svn_commit_db = None - - diff --git a/cvs2svn_lib/process.py b/cvs2svn_lib/process.py deleted file mode 100644 index 56469ce..0000000 --- a/cvs2svn_lib/process.py +++ /dev/null @@ -1,116 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2000-2008 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""This module contains generic utilities used by cvs2svn.""" - - -import subprocess - -from cvs2svn_lib.common import FatalError -from cvs2svn_lib.common import CommandError - - -def call_command(command, **kw): - """Call the specified command, checking that it exits successfully. - - Raise a FatalError if the command cannot be executed, or if it exits - with a non-zero exit code. Pass KW as keyword arguments to - subprocess.call().""" - - try: - retcode = subprocess.call(command, **kw) - if retcode < 0: - raise FatalError( - 'Command terminated by signal %d: "%s"' - % (-retcode, ' '.join(command),) - ) - elif retcode > 0: - raise FatalError( - 'Command failed with return code %d: "%s"' - % (retcode, ' '.join(command),) - ) - except OSError, e: - raise FatalError( - 'Command execution failed (%s): "%s"' - % (e, ' '.join(command),) - ) - - -class CommandFailedException(Exception): - """Exception raised if check_command_runs() fails.""" - - pass - - -def check_command_runs(cmd, cmdname): - """Check whether the command CMD can be executed without errors. - - CMD is a list or string, as accepted by subprocess.Popen(). CMDNAME - is the name of the command as it should be included in exception - error messages. - - This function checks three things: (1) the command can be run - without throwing an OSError; (2) it exits with status=0; (3) it - doesn't output anything to stderr. If any of these conditions is - not met, raise a CommandFailedException describing the problem.""" - - try: - pipe = subprocess.Popen( - cmd, - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - ) - except OSError, e: - raise CommandFailedException('error executing %s: %s' % (cmdname, e,)) - pipe.stdin.close() - pipe.stdout.read() - errmsg = pipe.stderr.read() - status = pipe.wait() - if status or errmsg: - msg = 'error executing %s: status %s' % (cmdname, status,) - if errmsg: - msg += ', error output:\n%s' % (errmsg,) - raise CommandFailedException(msg) - - -class PipeStream(object): - """A file-like object from which revision contents can be read.""" - - def __init__(self, pipe_command): - self._pipe_command_str = ' '.join(pipe_command) - self.pipe = subprocess.Popen( - pipe_command, - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - ) - self.pipe.stdin.close() - - def read(self, size=None): - if size is None: - return self.pipe.stdout.read() - else: - return self.pipe.stdout.read(size) - - def close(self): - self.pipe.stdout.close() - error_output = self.pipe.stderr.read() - exit_status = self.pipe.wait() - if exit_status: - raise CommandError(self._pipe_command_str, exit_status, error_output) - - diff --git a/cvs2svn_lib/project.py b/cvs2svn_lib/project.py deleted file mode 100644 index 0fe92df..0000000 --- a/cvs2svn_lib/project.py +++ /dev/null @@ -1,219 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2000-2008 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""This module contains database facilities used by cvs2svn.""" - - -import re -import os -import cPickle - -from cvs2svn_lib.context import Ctx -from cvs2svn_lib.common import FatalError -from cvs2svn_lib.common import IllegalSVNPathError -from cvs2svn_lib.common import normalize_svn_path -from cvs2svn_lib.common import verify_paths_disjoint -from cvs2svn_lib.symbol_transform import CompoundSymbolTransform - - -class FileInAndOutOfAtticException(Exception): - def __init__(self, non_attic_path, attic_path): - Exception.__init__( - self, - "A CVS repository cannot contain both %s and %s" - % (non_attic_path, attic_path)) - - self.non_attic_path = non_attic_path - self.attic_path = attic_path - - -def normalize_ttb_path(opt, path, allow_empty=False): - try: - return normalize_svn_path(path, allow_empty) - except IllegalSVNPathError, e: - raise FatalError('Problem with %s: %s' % (opt, e,)) - - -class Project(object): - """A project within a CVS repository.""" - - def __init__( - self, id, project_cvs_repos_path, - initial_directories=[], - symbol_transforms=None, - ): - """Create a new Project record. - - ID is a unique id for this project. PROJECT_CVS_REPOS_PATH is the - main CVS directory for this project (within the filesystem). - - INITIAL_DIRECTORIES is an iterable of all SVN directories that - should be created when the project is first created. Normally, - this should include the trunk, branches, and tags directory. - - SYMBOL_TRANSFORMS is an iterable of SymbolTransform instances - which will be used to transform any symbol names within this - project.""" - - self.id = id - - self.project_cvs_repos_path = os.path.normpath(project_cvs_repos_path) - if not os.path.isdir(self.project_cvs_repos_path): - raise FatalError("The specified CVS repository path '%s' is not an " - "existing directory." % self.project_cvs_repos_path) - - self.cvs_repository_root, self.cvs_module = \ - self.determine_repository_root( - os.path.abspath(self.project_cvs_repos_path)) - - # A regexp matching project_cvs_repos_path plus an optional separator: - self.project_prefix_re = re.compile( - r'^' + re.escape(self.project_cvs_repos_path) - + r'(' + re.escape(os.sep) + r'|$)') - - # The SVN directories to add when the project is first created: - self._initial_directories = [] - - for path in initial_directories: - try: - path = normalize_svn_path(path, False) - except IllegalSVNPathError, e: - raise FatalError( - 'Initial directory %r is not a legal SVN path: %s' - % (path, e,) - ) - self._initial_directories.append(path) - - verify_paths_disjoint(*self._initial_directories) - - # A list of transformation rules (regexp, replacement) applied to - # symbol names in this project. - if symbol_transforms is None: - symbol_transforms = [] - - self.symbol_transform = CompoundSymbolTransform(symbol_transforms) - - # The ID of the Trunk instance for this Project. This member is - # filled in during CollectRevsPass. - self.trunk_id = None - - # The ID of the CVSDirectory representing the root directory of - # this project. This member is filled in during CollectRevsPass. - self.root_cvs_directory_id = None - - def __eq__(self, other): - return self.id == other.id - - def __cmp__(self, other): - return cmp(self.cvs_module, other.cvs_module) \ - or cmp(self.id, other.id) - - def __hash__(self): - return self.id - - @staticmethod - def determine_repository_root(path): - """Ascend above the specified PATH if necessary to find the - cvs_repository_root (a directory containing a CVSROOT directory) - and the cvs_module (the path of the conversion root within the cvs - repository). Return the root path and the module path of this - project relative to the root. - - NB: cvs_module must be seperated by '/', *not* by os.sep.""" - - def is_cvs_repository_root(path): - return os.path.isdir(os.path.join(path, 'CVSROOT')) - - original_path = path - cvs_module = '' - while not is_cvs_repository_root(path): - # Step up one directory: - prev_path = path - path, module_component = os.path.split(path) - if path == prev_path: - # Hit the root (of the drive, on Windows) without finding a - # CVSROOT dir. - raise FatalError( - "the path '%s' is not a CVS repository, nor a path " - "within a CVS repository. A CVS repository contains " - "a CVSROOT directory within its root directory." - % (original_path,)) - - cvs_module = module_component + "/" + cvs_module - - return path, cvs_module - - def transform_symbol(self, cvs_file, symbol_name, revision): - """Transform the symbol SYMBOL_NAME. - - SYMBOL_NAME refers to revision number REVISION in CVS_FILE. - REVISION is the CVS revision number as a string, with zeros - removed (e.g., '1.7' or '1.7.2'). Use the renaming rules - specified with --symbol-transform to possibly rename the symbol. - Return the transformed symbol name, the original name if it should - not be transformed, or None if the symbol should be omitted from - the conversion.""" - - return self.symbol_transform.transform(cvs_file, symbol_name, revision) - - def get_trunk(self): - """Return the Trunk instance for this project. - - This method can only be called after self.trunk_id has been - initialized in CollectRevsPass.""" - - return Ctx()._symbol_db.get_symbol(self.trunk_id) - - def get_root_cvs_directory(self): - """Return the root CVSDirectory instance for this project. - - This method can only be called after self.root_cvs_directory_id - has been initialized in CollectRevsPass.""" - - return Ctx()._cvs_file_db.get_file(self.root_cvs_directory_id) - - def get_initial_directories(self): - """Generate the project's initial SVN directories. - - Yield as strings the SVN paths of directories that should be - created when the project is first created.""" - - # Yield the path of the Trunk symbol for this project (which might - # differ from the one passed to the --trunk option because of - # SymbolStrategyRules). The trunk path might be '' during a - # trunk-only conversion, but that is OK because DumpfileDelegate - # considers that directory to exist already and will therefore - # ignore it: - yield self.get_trunk().base_path - - for path in self._initial_directories: - yield path - - def __str__(self): - return self.project_cvs_repos_path - - -def read_projects(filename): - retval = {} - for project in cPickle.load(open(filename, 'rb')): - retval[project.id] = project - return retval - - -def write_projects(filename): - cPickle.dump(Ctx()._projects.values(), open(filename, 'wb'), -1) - - diff --git a/cvs2svn_lib/property_setters.py b/cvs2svn_lib/property_setters.py deleted file mode 100644 index 7cf379e..0000000 --- a/cvs2svn_lib/property_setters.py +++ /dev/null @@ -1,385 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2000-2008 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""This module contains classes to set Subversion properties on files.""" - - -import os -import re -import fnmatch -import ConfigParser -from cStringIO import StringIO - -from cvs2svn_lib.common import warning_prefix -from cvs2svn_lib.log import Log - - -class SVNPropertySetter: - """Abstract class for objects that can set properties on a SVNCommitItem.""" - - def set_properties(self, s_item): - """Set any properties that can be determined for S_ITEM. - - S_ITEM is an instance of SVNCommitItem. This method should modify - S_ITEM.svn_props in place.""" - - raise NotImplementedError - - -class CVSRevisionNumberSetter(SVNPropertySetter): - """Set the cvs2svn:cvs-rev property to the CVS revision number.""" - - propname = 'cvs2svn:cvs-rev' - - def set_properties(self, s_item): - if self.propname in s_item.svn_props: - return - - s_item.svn_props[self.propname] = s_item.cvs_rev.rev - s_item.svn_props_changed = True - - -class ExecutablePropertySetter(SVNPropertySetter): - """Set the svn:executable property based on cvs_rev.cvs_file.executable.""" - - propname = 'svn:executable' - - def set_properties(self, s_item): - if self.propname in s_item.svn_props: - return - - if s_item.cvs_rev.cvs_file.executable: - s_item.svn_props[self.propname] = '*' - - -class CVSBinaryFileEOLStyleSetter(SVNPropertySetter): - """Set the eol-style to None for files with CVS mode '-kb'.""" - - propname = 'svn:eol-style' - - def set_properties(self, s_item): - if self.propname in s_item.svn_props: - return - - if s_item.cvs_rev.cvs_file.mode == 'b': - s_item.svn_props[self.propname] = None - - -class MimeMapper(SVNPropertySetter): - """A class that provides mappings from file names to MIME types.""" - - propname = 'svn:mime-type' - - def __init__(self, mime_types_file): - self.mappings = { } - - for line in file(mime_types_file): - if line.startswith("#"): - continue - - # format of a line is something like - # text/plain c h cpp - extensions = line.split() - if len(extensions) < 2: - continue - type = extensions.pop(0) - for ext in extensions: - if ext in self.mappings and self.mappings[ext] != type: - Log().error( - "%s: ambiguous MIME mapping for *.%s (%s or %s)\n" - % (warning_prefix, ext, self.mappings[ext], type) - ) - self.mappings[ext] = type - - def set_properties(self, s_item): - if self.propname in s_item.svn_props: - return - - basename, extension = os.path.splitext(s_item.cvs_rev.cvs_file.basename) - - # Extension includes the dot, so strip it (will leave extension - # empty if filename ends with a dot, which is ok): - extension = extension[1:] - - # If there is no extension (or the file ends with a period), use - # the base name for mapping. This allows us to set mappings for - # files such as README or Makefile: - if not extension: - extension = basename - - mime_type = self.mappings.get(extension, None) - if mime_type is not None: - s_item.svn_props[self.propname] = mime_type - - -class AutoPropsPropertySetter(SVNPropertySetter): - """Set arbitrary svn properties based on an auto-props configuration. - - This class supports case-sensitive or case-insensitive pattern - matching. The command-line default is case-insensitive behavior, - consistent with Subversion (see - http://subversion.tigris.org/issues/show_bug.cgi?id=2036). - - As a special extension to Subversion's auto-props handling, if a - property name is preceded by a '!' then that property is forced to - be left unset. - - If a property specified in auto-props has already been set to a - different value, print a warning and leave the old property value - unchanged. - - Python's treatment of whitespaces in the ConfigParser module is - buggy and inconsistent. Usually spaces are preserved, but if there - is at least one semicolon in the value, and the *first* semicolon is - preceded by a space, then that is treated as the start of a comment - and the rest of the line is silently discarded.""" - - property_name_pattern = r'(?P[^\!\=\s]+)' - property_unset_re = re.compile( - r'^\!\s*' + property_name_pattern + r'$' - ) - property_set_re = re.compile( - r'^' + property_name_pattern + r'\s*\=\s*(?P.*)$' - ) - property_novalue_re = re.compile( - r'^' + property_name_pattern + r'$' - ) - - quoted_re = re.compile( - r'^([\'\"]).*\1$' - ) - comment_re = re.compile(r'\s;') - - class Pattern: - """Describes the properties to be set for files matching a pattern.""" - - def __init__(self, pattern, propdict): - # A glob-like pattern: - self.pattern = pattern - # A dictionary of properties that should be set: - self.propdict = propdict - - def match(self, basename): - """Does the file with the specified basename match pattern?""" - - return fnmatch.fnmatch(basename, self.pattern) - - def __init__(self, configfilename, ignore_case=True): - config = ConfigParser.ConfigParser() - if ignore_case: - self.transform_case = self.squash_case - else: - config.optionxform = self.preserve_case - self.transform_case = self.preserve_case - - configtext = open(configfilename).read() - if self.comment_re.search(configtext): - Log().warn( - '%s: Please be aware that a space followed by a\n' - 'semicolon is sometimes treated as a comment in configuration\n' - 'files. This pattern was seen in\n' - ' %s\n' - 'Please make sure that you have not inadvertently commented\n' - 'out part of an important line.' - % (warning_prefix, configfilename,) - ) - - config.readfp(StringIO(configtext), configfilename) - self.patterns = [] - sections = config.sections() - sections.sort() - for section in sections: - if self.transform_case(section) == 'auto-props': - patterns = config.options(section) - patterns.sort() - for pattern in patterns: - value = config.get(section, pattern) - if value: - self._add_pattern(pattern, value) - - def squash_case(self, s): - return s.lower() - - def preserve_case(self, s): - return s - - def _add_pattern(self, pattern, props): - propdict = {} - if self.quoted_re.match(pattern): - Log().warn( - '%s: Quoting is not supported in auto-props; please verify rule\n' - 'for %r. (Using pattern including quotation marks.)\n' - % (warning_prefix, pattern,) - ) - for prop in props.split(';'): - prop = prop.strip() - m = self.property_unset_re.match(prop) - if m: - name = m.group('name') - Log().debug( - 'auto-props: For %r, leaving %r unset.' % (pattern, name,) - ) - propdict[name] = None - continue - - m = self.property_set_re.match(prop) - if m: - name = m.group('name') - value = m.group('value') - if self.quoted_re.match(value): - Log().warn( - '%s: Quoting is not supported in auto-props; please verify\n' - 'rule %r for pattern %r. (Using value\n' - 'including quotation marks.)\n' - % (warning_prefix, prop, pattern,) - ) - Log().debug( - 'auto-props: For %r, setting %r to %r.' % (pattern, name, value,) - ) - propdict[name] = value - continue - - m = self.property_novalue_re.match(prop) - if m: - name = m.group('name') - Log().debug( - 'auto-props: For %r, setting %r to the empty string' - % (pattern, name,) - ) - propdict[name] = '' - continue - - Log().warn( - '%s: in auto-props line for %r, value %r cannot be parsed (ignored)' - % (warning_prefix, pattern, prop,) - ) - - self.patterns.append(self.Pattern(self.transform_case(pattern), propdict)) - - def get_propdict(self, cvs_file): - basename = self.transform_case(cvs_file.basename) - propdict = {} - for pattern in self.patterns: - if pattern.match(basename): - for (key,value) in pattern.propdict.items(): - if key in propdict: - if propdict[key] != value: - Log().warn( - "Contradictory values set for property '%s' for file %s." - % (key, cvs_file,)) - else: - propdict[key] = value - - return propdict - - def set_properties(self, s_item): - propdict = self.get_propdict(s_item.cvs_rev.cvs_file) - for (k,v) in propdict.items(): - if k in s_item.svn_props: - if s_item.svn_props[k] != v: - Log().warn( - "Property '%s' already set to %r for file %s; " - "auto-props value (%r) ignored." - % (k, s_item.svn_props[k], s_item.cvs_rev.cvs_path, v,)) - else: - s_item.svn_props[k] = v - - -class CVSBinaryFileDefaultMimeTypeSetter(SVNPropertySetter): - """If the file is binary and its svn:mime-type property is not yet - set, set it to 'application/octet-stream'.""" - - propname = 'svn:mime-type' - - def set_properties(self, s_item): - if self.propname in s_item.svn_props: - return - - if s_item.cvs_rev.cvs_file.mode == 'b': - s_item.svn_props[self.propname] = 'application/octet-stream' - - -class EOLStyleFromMimeTypeSetter(SVNPropertySetter): - """Set svn:eol-style based on svn:mime-type. - - If svn:mime-type is known but svn:eol-style is not, then set - svn:eol-style based on svn:mime-type as follows: if svn:mime-type - starts with 'text/', then set svn:eol-style to native; otherwise, - force it to remain unset. See also issue #39.""" - - propname = 'svn:eol-style' - - def set_properties(self, s_item): - if self.propname in s_item.svn_props: - return - - if s_item.svn_props.get('svn:mime-type', None) is not None: - if s_item.svn_props['svn:mime-type'].startswith("text/"): - s_item.svn_props[self.propname] = 'native' - else: - s_item.svn_props[self.propname] = None - - -class DefaultEOLStyleSetter(SVNPropertySetter): - """Set the eol-style if one has not already been set.""" - - propname = 'svn:eol-style' - - def __init__(self, value): - """Initialize with the specified default VALUE.""" - - self.value = value - - def set_properties(self, s_item): - if self.propname in s_item.svn_props: - return - - s_item.svn_props[self.propname] = self.value - - -class SVNBinaryFileKeywordsPropertySetter(SVNPropertySetter): - """Turn off svn:keywords for files with binary svn:eol-style.""" - - propname = 'svn:keywords' - - def set_properties(self, s_item): - if self.propname in s_item.svn_props: - return - - if not s_item.svn_props.get('svn:eol-style'): - s_item.svn_props[self.propname] = None - - -class KeywordsPropertySetter(SVNPropertySetter): - """If the svn:keywords property is not yet set, set it based on the - file's mode. See issue #2.""" - - propname = 'svn:keywords' - - def __init__(self, value): - """Use VALUE for the value of the svn:keywords property if it is - to be set.""" - - self.value = value - - def set_properties(self, s_item): - if self.propname in s_item.svn_props: - return - - if s_item.cvs_rev.cvs_file.mode in [None, 'kv', 'kvl']: - s_item.svn_props[self.propname] = self.value - - diff --git a/cvs2svn_lib/rcs_revision_manager.py b/cvs2svn_lib/rcs_revision_manager.py deleted file mode 100644 index 1c2dfcf..0000000 --- a/cvs2svn_lib/rcs_revision_manager.py +++ /dev/null @@ -1,51 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2000-2008 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""Access the CVS repository via RCS's 'co' command.""" - - -from cvs2svn_lib.common import FatalError -from cvs2svn_lib.process import check_command_runs -from cvs2svn_lib.process import PipeStream -from cvs2svn_lib.process import CommandFailedException -from cvs2svn_lib.revision_manager import RevisionReader - - -class RCSRevisionReader(RevisionReader): - """A RevisionReader that reads the contents via RCS.""" - - def __init__(self, co_executable): - self.co_executable = co_executable - try: - check_command_runs([self.co_executable, '-V'], self.co_executable) - except CommandFailedException, e: - raise FatalError('%s\n' - 'Please check that co is installed and in your PATH\n' - '(it is a part of the RCS software).' % (e,)) - - def get_content_stream(self, cvs_rev, suppress_keyword_substitution=False): - pipe_cmd = [ - self.co_executable, - '-q', - '-x,v', - '-p%s' % (cvs_rev.rev,) - ] - if suppress_keyword_substitution: - pipe_cmd.append('-kk') - pipe_cmd.append(cvs_rev.cvs_file.filename) - return PipeStream(pipe_cmd) - - diff --git a/cvs2svn_lib/rcs_stream.py b/cvs2svn_lib/rcs_stream.py deleted file mode 100644 index b893819..0000000 --- a/cvs2svn_lib/rcs_stream.py +++ /dev/null @@ -1,149 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2007 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""This module processes RCS diffs (deltas).""" - - -import re - -def msplit(s): - """Split S into an array of lines. - - Only \n is a line separator. The line endings are part of the lines.""" - - # return s.splitlines(True) clobbers \r - re = [ i + "\n" for i in s.split("\n") ] - re[-1] = re[-1][:-1] - if not re[-1]: - del re[-1] - return re - - -class MalformedDeltaException(Exception): - """A malformed RCS delta was encountered.""" - - pass - -class RCSStream: - """This class represents a single file object to which RCS deltas can be - applied in various ways.""" - - ad_command = re.compile(r'^([ad])(\d+)\s(\d+)\n$') - a_command = re.compile(r'^a(\d+)\s(\d+)\n$') - - def __init__(self, text): - """Instantiate and initialize the file content with TEXT.""" - - self._texts = msplit(text) - - def get_text(self): - """Return the current file content.""" - - return "".join(self._texts) - - def apply_diff(self, diff): - """Apply the RCS diff DIFF to the current file content.""" - - ntexts = [] - ooff = 0 - diffs = msplit(diff) - i = 0 - while i < len(diffs): - admatch = self.ad_command.match(diffs[i]) - if not admatch: - raise MalformedDeltaException('Bad ed command') - i += 1 - sl = int(admatch.group(2)) - cn = int(admatch.group(3)) - if admatch.group(1) == 'd': # "d" - Delete command - sl -= 1 - if sl < ooff: - raise MalformedDeltaException('Deletion before last edit') - if sl > len(self._texts): - raise MalformedDeltaException('Deletion past file end') - if sl + cn > len(self._texts): - raise MalformedDeltaException('Deletion beyond file end') - ntexts += self._texts[ooff:sl] - ooff = sl + cn - else: # "a" - Add command - if sl < ooff: # Also catches same place - raise MalformedDeltaException('Insertion before last edit') - if sl > len(self._texts): - raise MalformedDeltaException('Insertion past file end') - ntexts += self._texts[ooff:sl] + diffs[i:i + cn] - ooff = sl - i += cn - self._texts = ntexts + self._texts[ooff:] - - def invert_diff(self, diff): - """Apply the RCS diff DIFF to the current file content and simultaneously - generate an RCS diff suitable for reverting the change.""" - - ntexts = [] - ooff = 0 - diffs = msplit(diff) - ndiffs = [] - adjust = 0 - i = 0 - while i < len(diffs): - admatch = self.ad_command.match(diffs[i]) - if not admatch: - raise MalformedDeltaException('Bad ed command') - i += 1 - sl = int(admatch.group(2)) - cn = int(admatch.group(3)) - if admatch.group(1) == 'd': # "d" - Delete command - sl -= 1 - if sl < ooff: - raise MalformedDeltaException('Deletion before last edit') - if sl > len(self._texts): - raise MalformedDeltaException('Deletion past file end') - if sl + cn > len(self._texts): - raise MalformedDeltaException('Deletion beyond file end') - # Handle substitution explicitly, as add must come after del - # (last add may end in no newline, so no command can follow). - if i < len(diffs): - amatch = self.a_command.match(diffs[i]) - else: - amatch = None - if amatch and int(amatch.group(1)) == sl + cn: - cn2 = int(amatch.group(2)) - i += 1 - ndiffs += ["d%d %d\na%d %d\n" % \ - (sl + 1 + adjust, cn2, sl + adjust + cn2, cn)] + \ - self._texts[sl:sl + cn] - ntexts += self._texts[ooff:sl] + diffs[i:i + cn2] - adjust += cn2 - cn - i += cn2 - else: - ndiffs += ["a%d %d\n" % (sl + adjust, cn)] + \ - self._texts[sl:sl + cn] - ntexts += self._texts[ooff:sl] - adjust -= cn - ooff = sl + cn - else: # "a" - Add command - if sl < ooff: # Also catches same place - raise MalformedDeltaException('Insertion before last edit') - if sl > len(self._texts): - raise MalformedDeltaException('Insertion past file end') - ndiffs += ["d%d %d\n" % (sl + 1 + adjust, cn)] - ntexts += self._texts[ooff:sl] + diffs[i:i + cn] - ooff = sl - adjust += cn - i += cn - self._texts = ntexts + self._texts[ooff:] - return "".join(ndiffs) - diff --git a/cvs2svn_lib/record_table.py b/cvs2svn_lib/record_table.py deleted file mode 100644 index 41ab84a..0000000 --- a/cvs2svn_lib/record_table.py +++ /dev/null @@ -1,399 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2000-2008 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""Classes to manage Databases of fixed-length records. - -The databases map small, non-negative integers to fixed-size records. -The records are written in index order to a disk file. Gaps in the -index sequence leave gaps in the data file, so for best space -efficiency the indexes of existing records should be approximately -continuous. - -To use a RecordTable, you need a class derived from Packer which can -serialize/deserialize your records into fixed-size strings. Deriving -classes have to specify how to pack records into strings and unpack -strings into records by overwriting the pack() and unpack() methods -respectively. - -Note that these classes keep track of gaps in the records that have -been written by filling them with packer.empty_value. If a record is -read which contains packer.empty_value, then a KeyError is raised.""" - - -import os -import types -import struct -import mmap - -from cvs2svn_lib.common import DB_OPEN_READ -from cvs2svn_lib.common import DB_OPEN_WRITE -from cvs2svn_lib.common import DB_OPEN_NEW -from cvs2svn_lib.log import Log - - -# A unique value that can be used to stand for "unset" without -# preventing the use of None. -_unset = object() - - -class Packer(object): - def __init__(self, record_len, empty_value=None): - self.record_len = record_len - if empty_value is None: - self.empty_value = '\0' * self.record_len - else: - assert type(empty_value) is types.StringType - assert len(empty_value) == self.record_len - self.empty_value = empty_value - - def pack(self, v): - """Pack record V into a string of length self.record_len.""" - - raise NotImplementedError() - - def unpack(self, s): - """Unpack string S into a record.""" - - raise NotImplementedError() - - -class StructPacker(Packer): - def __init__(self, format, empty_value=_unset): - self.format = format - if empty_value is not _unset: - empty_value = self.pack(empty_value) - else: - empty_value = None - - Packer.__init__(self, struct.calcsize(self.format), - empty_value=empty_value) - - def pack(self, v): - return struct.pack(self.format, v) - - def unpack(self, v): - return struct.unpack(self.format, v)[0] - - -class UnsignedIntegerPacker(StructPacker): - def __init__(self, empty_value=0): - StructPacker.__init__(self, '=I', empty_value) - - -class SignedIntegerPacker(StructPacker): - def __init__(self, empty_value=0): - StructPacker.__init__(self, '=i', empty_value) - - -class FileOffsetPacker(Packer): - """A packer suitable for file offsets. - - We store the 5 least significant bytes of the file offset. This is - enough bits to represent 1 TiB. Of course if the computer - doesn't have large file support, only the lowest 31 bits can be - nonzero, and the offsets are limited to 2 GiB.""" - - # Convert file offsets to 8-bit little-endian unsigned longs... - INDEX_FORMAT = '= self._max_memory_cache: - self.flush() - self._limit = max(self._limit, i + 1) - - def _get_packed_record(self, i): - try: - return self._cache[i][1] - except KeyError: - if not 0 <= i < self._limit_written: - raise KeyError(i) - self.f.seek(i * self._record_len) - s = self.f.read(self._record_len) - self._cache[i] = (False, s) - if len(self._cache) >= self._max_memory_cache: - self.flush() - - return s - - def close(self): - self.flush() - self._cache = None - self.f.close() - self.f = None - - -class MmapRecordTable(AbstractRecordTable): - GROWTH_INCREMENT = 65536 - - def __init__(self, filename, mode, packer): - AbstractRecordTable.__init__(self, filename, mode, packer) - if self.mode == DB_OPEN_NEW: - self.python_file = open(self.filename, 'wb+') - self.python_file.write('\0' * self.GROWTH_INCREMENT) - self.python_file.flush() - self._filesize = self.GROWTH_INCREMENT - self.f = mmap.mmap( - self.python_file.fileno(), self._filesize, - access=mmap.ACCESS_WRITE - ) - - # The index just beyond the last record ever written: - self._limit = 0 - elif self.mode == DB_OPEN_WRITE: - self.python_file = open(self.filename, 'rb+') - self._filesize = os.path.getsize(self.filename) - self.f = mmap.mmap( - self.python_file.fileno(), self._filesize, - access=mmap.ACCESS_WRITE - ) - - # The index just beyond the last record ever written: - self._limit = os.path.getsize(self.filename) // self._record_len - elif self.mode == DB_OPEN_READ: - self.python_file = open(self.filename, 'rb') - self._filesize = os.path.getsize(self.filename) - self.f = mmap.mmap( - self.python_file.fileno(), self._filesize, - access=mmap.ACCESS_READ - ) - - # The index just beyond the last record ever written: - self._limit = os.path.getsize(self.filename) // self._record_len - else: - raise RuntimeError('Invalid mode %r' % self.mode) - - def flush(self): - self.f.flush() - - def _set_packed_record(self, i, s): - if self.mode == DB_OPEN_READ: - raise RecordTableAccessError() - if i < 0: - raise KeyError() - if i >= self._limit: - # This write extends the range of valid indices. First check - # whether the file has to be enlarged: - new_size = (i + 1) * self._record_len - if new_size > self._filesize: - self._filesize = ( - (new_size + self.GROWTH_INCREMENT - 1) - // self.GROWTH_INCREMENT - * self.GROWTH_INCREMENT - ) - self.f.resize(self._filesize) - if i > self._limit: - # Pad up to the new record with empty_value: - self.f[self._limit * self._record_len:i * self._record_len] = \ - self.packer.empty_value * (i - self._limit) - self._limit = i + 1 - - self.f[i * self._record_len:(i + 1) * self._record_len] = s - - def _get_packed_record(self, i): - if not 0 <= i < self._limit: - raise KeyError(i) - return self.f[i * self._record_len:(i + 1) * self._record_len] - - def close(self): - self.flush() - self.f.close() - self.python_file.close() - - diff --git a/cvs2svn_lib/repository_delegate.py b/cvs2svn_lib/repository_delegate.py deleted file mode 100644 index 53c9b65..0000000 --- a/cvs2svn_lib/repository_delegate.py +++ /dev/null @@ -1,98 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2000-2008 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""This module contains class RepositoryDelegate.""" - - -import os -import subprocess - -from cvs2svn_lib.common import CommandError -from cvs2svn_lib.common import FatalError -from cvs2svn_lib.config import DUMPFILE -from cvs2svn_lib.context import Ctx -from cvs2svn_lib.dumpfile_delegate import DumpfileDelegate - - -class RepositoryDelegate(DumpfileDelegate): - """Creates a new Subversion Repository. DumpfileDelegate does all - of the heavy lifting.""" - - def __init__(self, revision_reader, target): - self.target = target - - # Since the output of this run is a repository, not a dumpfile, - # the temporary dumpfiles we create should go in the tmpdir. But - # since we delete it ourselves, we don't want to use - # artifact_manager. - DumpfileDelegate.__init__( - self, revision_reader, Ctx().get_temp_filename(DUMPFILE) - ) - - self.dumpfile = open(self.dumpfile_path, 'w+b') - self.loader_pipe = subprocess.Popen( - [Ctx().svnadmin_executable, 'load', '-q', self.target], - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - ) - self.loader_pipe.stdout.close() - try: - self._write_dumpfile_header(self.loader_pipe.stdin) - except IOError: - raise FatalError( - 'svnadmin failed with the following output while ' - 'loading the dumpfile:\n%s' - % (self.loader_pipe.stderr.read(),) - ) - - def start_commit(self, revnum, revprops): - """Start a new commit.""" - - DumpfileDelegate.start_commit(self, revnum, revprops) - - def end_commit(self): - """Feed the revision stored in the dumpfile to the svnadmin load pipe.""" - - DumpfileDelegate.end_commit(self) - - self.dumpfile.seek(0) - while True: - data = self.dumpfile.read(128*1024) # Chunk size is arbitrary - if not data: - break - try: - self.loader_pipe.stdin.write(data) - except IOError: - raise FatalError("svnadmin failed with the following output " - "while loading the dumpfile:\n" - + self.loader_pipe.stderr.read()) - self.dumpfile.seek(0) - self.dumpfile.truncate() - - def finish(self): - """Clean up.""" - - self.dumpfile.close() - self.loader_pipe.stdin.close() - error_output = self.loader_pipe.stderr.read() - exit_status = self.loader_pipe.wait() - del self.loader_pipe - if exit_status: - raise CommandError('svnadmin load', exit_status, error_output) - os.remove(self.dumpfile_path) - - diff --git a/cvs2svn_lib/repository_mirror.py b/cvs2svn_lib/repository_mirror.py deleted file mode 100644 index 72e2ba1..0000000 --- a/cvs2svn_lib/repository_mirror.py +++ /dev/null @@ -1,897 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2000-2009 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""This module contains the RepositoryMirror class and supporting classes. - -RepositoryMirror represents the skeleton of a versioned file tree with -multiple lines of development ('LODs'). It records the presence or -absence of files and directories, but not their contents. Given three -values (revnum, lod, cvs_path), it can tell you whether the specified -CVSPath existed on the specified LOD in the given revision number. -The file trees corresponding to the most recent revision can be -modified. - -The individual file trees are stored using immutable tree structures. -Each directory node is represented as a MirrorDirectory instance, -which is basically a map {cvs_path : node_id}, where cvs_path is a -CVSPath within the directory, and node_id is an integer ID that -uniquely identifies another directory node if that node is a -CVSDirectory, or None if that node is a CVSFile. If a directory node -is to be modified, then first a new node is created with a copy of the -original node's contents, then the copy is modified. A reference to -the copy also has to be stored in the parent node, meaning that the -parent node needs to be modified, and so on recursively to the root -node of the file tree. This data structure allows cheap deep copies, -which is useful for tagging and branching. - -The class must also be able to find the root directory node -corresponding to a particular (revnum, lod). This is done by keeping -an LODHistory instance for each LOD, which can determine the root -directory node ID for that LOD for any revnum. It does so by -recording changes to the root directory node ID only for revisions in -which it changed. Thus it stores two arrays, revnums (a list of the -revision numbers when the ID changed), and ids (a list of the -corresponding IDs). To find the ID for a particular revnum, first a -binary search is done in the revnums array to find the index of the -last change preceding revnum, then the corresponding ID is read from -the ids array. Since most revisions change only one LOD, this allows -storage of the history of potentially tens of thousands of LODs over -hundreds of thousands of revisions in an amount of space that scales -as O(numberOfLODs + numberOfRevisions), rather than O(numberOfLODs * -numberOfRevisions) as would be needed if the information were stored -in the equivalent of a 2D array. - -The internal operation of these classes is somewhat intricate, but the -interface attempts to hide the complexity, enforce the usage rules, -and allow efficient access. The most important facts to remember are -(1) that a directory node can be used for multiple purposes (for -multiple branches and for multiple revisions on a single branch), (2) -that only a node that has been created within the current revision is -allowed to be mutated, and (3) that the current revision can include -nodes carried over from prior revisions, which are immutable. - -This leads to a bewildering variety of MirrorDirectory classes. The -most important distinction is between OldMirrorDirectories and -CurrentMirrorDirectories. A single node can be represented multiple -ways in memory at the same time, depending on whether it was looked up -as part of the current revision or part of an old revision: - - MirrorDirectory -- the base class for all MirrorDirectory nodes. - This class allows lookup of subnodes and iteration over - subnodes. - - OldMirrorDirectory -- a MirrorDirectory that was looked up for an - old revision. These instances are immutable, as only the - current revision is allowed to be modified. - - CurrentMirrorDirectory -- a MirrorDirectory that was looked up for - the current revision. Such an instance is always logically - mutable, though mutating it might require the node to be - copied first. Such an instance might represent a node that - has already been copied during this revision and can therefore - be modified freely (such nodes implement - _WritableMirrorDirectoryMixin), or it might represent a node - that was carried over from an old revision and hasn't been - copied yet (such nodes implement - _ReadOnlyMirrorDirectoryMixin). If the latter, then the node - copies itself (and bubbles up the change) before allowing - itself to be modified. But the distinction is managed - internally; client classes should not have to worry about it. - - CurrentMirrorLODDirectory -- A CurrentMirrorDirectory representing - the root directory of a line of development in the current - revision. This class has two concrete subclasses, - _CurrentMirrorReadOnlyLODDirectory and - _CurrentMirrorWritableLODDirectory, depending on whether the - node has already been copied during this revision. - - - CurrentMirrorSubdirectory -- A CurrentMirrorDirectory representing - a subdirectory within a line of development's directory tree - in the current revision. This class has two concrete - subclasses, _CurrentMirrorReadOnlySubdirectory and - _CurrentMirrorWritableSubdirectory, depending on whether the - node has already been copied during this revision. - - DeletedCurrentMirrorDirectory -- a MirrorDirectory that has been - deleted. Such an instance is disabled so that it cannot - accidentally be used. - -While a revision is being processed, RepositoryMirror._new_nodes holds -every writable CurrentMirrorDirectory instance (i.e., every node that -has been created in the revision). Since these nodes are mutable, it -is important that there be exactly one instance associated with each -node; otherwise there would be problems keeping the instances -synchronized. These are written to the database by -RepositoryMirror.end_commit(). - -OldMirrorDirectory and read-only CurrentMirrorDirectory instances are -*not* cached; they are recreated whenever they are referenced. There -might be multiple instances referring to the same node. A read-only -CurrentMirrorDirectory instance is mutated in place into a writable -CurrentMirrorDirectory instance if it needs to be modified. - -FIXME: The rules for when a MirrorDirectory instance can continue to -be used vs. when it has to be read again (because it has been modified -indirectly and therefore copied) are confusing and error-prone. -Probably the semantics should be changed. - -""" - - -import bisect - -from cvs2svn_lib import config -from cvs2svn_lib.common import DB_OPEN_NEW -from cvs2svn_lib.common import InternalError -from cvs2svn_lib.log import Log -from cvs2svn_lib.context import Ctx -from cvs2svn_lib.cvs_file import CVSFile -from cvs2svn_lib.cvs_file import CVSDirectory -from cvs2svn_lib.key_generator import KeyGenerator -from cvs2svn_lib.artifact_manager import artifact_manager -from cvs2svn_lib.serializer import MarshalSerializer -from cvs2svn_lib.database import IndexedDatabase - - -class RepositoryMirrorError(Exception): - """An error related to the RepositoryMirror.""" - - pass - - -class LODExistsError(RepositoryMirrorError): - """The LOD already exists in the repository. - - Exception raised if an attempt is made to add an LOD to the - repository mirror and that LOD already exists in the youngest - revision of the repository.""" - - pass - - -class PathExistsError(RepositoryMirrorError): - """The path already exists in the repository. - - Exception raised if an attempt is made to add a path to the - repository mirror and that path already exists in the youngest - revision of the repository.""" - - pass - - -class DeletedNodeReusedError(RepositoryMirrorError): - """The MirrorDirectory has already been deleted and shouldn't be reused.""" - - pass - - -class CopyFromCurrentNodeError(RepositoryMirrorError): - """A CurrentMirrorDirectory cannot be copied to the current revision.""" - - pass - - -class MirrorDirectory(object): - """Represent a node within the RepositoryMirror. - - Instances of this class act like a map {CVSPath : MirrorDirectory}, - where CVSPath is an item within this directory (i.e., a file or - subdirectory within this directory). The value is either another - MirrorDirectory instance (for directories) or None (for files).""" - - def __init__(self, repo, id, entries): - # The RepositoryMirror containing this directory: - self.repo = repo - - # The id of this node: - self.id = id - - # The entries within this directory, stored as a map {CVSPath : - # node_id}. The node_ids are integers for CVSDirectories, None - # for CVSFiles: - self._entries = entries - - def __getitem__(self, cvs_path): - """Return the MirrorDirectory associated with the specified subnode. - - Return a MirrorDirectory instance if the subnode is a - CVSDirectory; None if it is a CVSFile. Raise KeyError if the - specified subnode does not exist.""" - - raise NotImplementedError() - - def __len__(self): - """Return the number of CVSPaths within this node.""" - - return len(self._entries) - - def __contains__(self, cvs_path): - """Return True iff CVS_PATH is contained in this node.""" - - return cvs_path in self._entries - - def __iter__(self): - """Iterate over the CVSPaths within this node.""" - - return self._entries.__iter__() - - def _format_entries(self): - """Format the entries map for output in subclasses' __repr__() methods.""" - - def format_item(key, value): - if value is None: - return str(key) - else: - return '%s -> %x' % (key, value,) - - items = self._entries.items() - items.sort() - return '{%s}' % (', '.join([format_item(*item) for item in items]),) - - def __str__(self): - """For convenience only. The format is subject to change at any time.""" - - return '%s<%x>' % (self.__class__.__name__, self.id,) - - -class OldMirrorDirectory(MirrorDirectory): - """Represent a historical directory within the RepositoryMirror.""" - - def __getitem__(self, cvs_path): - id = self._entries[cvs_path] - if id is None: - # This represents a leaf node. - return None - else: - return OldMirrorDirectory(self.repo, id, self.repo._node_db[id]) - - def __repr__(self): - """For convenience only. The format is subject to change at any time.""" - - return '%s(%s)' % (self, self._format_entries(),) - - -class CurrentMirrorDirectory(MirrorDirectory): - """Represent a directory that currently exists in the RepositoryMirror.""" - - def __init__(self, repo, id, lod, cvs_path, entries): - MirrorDirectory.__init__(self, repo, id, entries) - self.lod = lod - self.cvs_path = cvs_path - - def __getitem__(self, cvs_path): - id = self._entries[cvs_path] - if id is None: - # This represents a leaf node. - return None - else: - try: - return self.repo._new_nodes[id] - except KeyError: - return _CurrentMirrorReadOnlySubdirectory( - self.repo, id, self.lod, cvs_path, self, - self.repo._node_db[id] - ) - - def __setitem__(self, cvs_path, node): - """Create or overwrite a subnode of this node. - - CVS_PATH is the path of the subnode. NODE will be the new value - of the node; for CVSDirectories it should be a MirrorDirectory - instance; for CVSFiles it should be None.""" - - if isinstance(node, DeletedCurrentMirrorDirectory): - raise DeletedNodeReusedError( - '%r has already been deleted and should not be reused' % (node,) - ) - elif isinstance(node, CurrentMirrorDirectory): - raise CopyFromCurrentNodeError( - '%r was created in the current node and cannot be copied' % (node,) - ) - else: - self._set_entry(cvs_path, node) - - def __delitem__(self, cvs_path): - """Remove the subnode of this node at CVS_PATH. - - If the node does not exist, then raise a KeyError.""" - - node = self[cvs_path] - self._del_entry(cvs_path) - if isinstance(node, _WritableMirrorDirectoryMixin): - node._mark_deleted() - - def mkdir(self, cvs_directory): - """Create an empty subdirectory of this node at CVS_PATH. - - Return the CurrentDirectory that was created.""" - - assert isinstance(cvs_directory, CVSDirectory) - if cvs_directory in self: - raise PathExistsError( - 'Attempt to create directory \'%s\' in %s in repository mirror ' - 'when it already exists.' - % (cvs_directory, self.lod,) - ) - - new_node = _CurrentMirrorWritableSubdirectory( - self.repo, self.repo._key_generator.gen_id(), self.lod, cvs_directory, - self, {} - ) - self._set_entry(cvs_directory, new_node) - self.repo._new_nodes[new_node.id] = new_node - return new_node - - def add_file(self, cvs_file): - """Create a file within this node at CVS_FILE.""" - - assert isinstance(cvs_file, CVSFile) - if cvs_file in self: - raise PathExistsError( - 'Attempt to create file \'%s\' in %s in repository mirror ' - 'when it already exists.' - % (cvs_file, self.lod,) - ) - - self._set_entry(cvs_file, None) - - def __repr__(self): - """For convenience only. The format is subject to change at any time.""" - - return '%s(%r, %r, %s)' % ( - self, self.lod, self.cvs_path, self._format_entries(), - ) - - -class DeletedCurrentMirrorDirectory(object): - """A MirrorDirectory that has been deleted. - - A MirrorDirectory that used to be a _WritableMirrorDirectoryMixin - but then was deleted. Such instances are turned into this class so - that nobody can accidentally mutate them again.""" - - pass - - -class _WritableMirrorDirectoryMixin: - """Mixin for MirrorDirectories that are already writable. - - A MirrorDirectory is writable if it has already been recreated - during the current revision.""" - - def _set_entry(self, cvs_path, node): - """Create or overwrite a subnode of this node, with no checks.""" - - if node is None: - self._entries[cvs_path] = None - else: - self._entries[cvs_path] = node.id - - def _del_entry(self, cvs_path): - """Remove the subnode of this node at CVS_PATH, with no checks.""" - - del self._entries[cvs_path] - - def _mark_deleted(self): - """Mark this object and any writable descendants as being deleted.""" - - self.__class__ = DeletedCurrentMirrorDirectory - - for (cvs_path, id) in self._entries.iteritems(): - if id in self.repo._new_nodes: - node = self[cvs_path] - if isinstance(node, _WritableMirrorDirectoryMixin): - # Mark deleted and recurse: - node._mark_deleted() - - -class _ReadOnlyMirrorDirectoryMixin: - """Mixin for a CurrentMirrorDirectory that hasn't yet been made writable.""" - - def _make_writable(self): - raise NotImplementedError() - - def _set_entry(self, cvs_path, node): - """Create or overwrite a subnode of this node, with no checks.""" - - self._make_writable() - self._set_entry(cvs_path, node) - - def _del_entry(self, cvs_path): - """Remove the subnode of this node at CVS_PATH, with no checks.""" - - self._make_writable() - self._del_entry(cvs_path) - - -class CurrentMirrorLODDirectory(CurrentMirrorDirectory): - """Represent an LOD's main directory in the mirror's current version.""" - - def __init__(self, repo, id, lod, entries): - CurrentMirrorDirectory.__init__( - self, repo, id, lod, lod.project.get_root_cvs_directory(), entries - ) - - def delete(self): - """Remove the directory represented by this object.""" - - lod_history = self.repo._get_lod_history(self.lod) - assert lod_history.exists() - lod_history.update(self.repo._youngest, None) - self._mark_deleted() - - -class _CurrentMirrorReadOnlyLODDirectory( - CurrentMirrorLODDirectory, _ReadOnlyMirrorDirectoryMixin - ): - """Represent an LOD's main directory in the mirror's current version.""" - - def _make_writable(self): - self.__class__ = _CurrentMirrorWritableLODDirectory - # Create a new ID: - self.id = self.repo._key_generator.gen_id() - self.repo._new_nodes[self.id] = self - self.repo._get_lod_history(self.lod).update(self.repo._youngest, self.id) - self._entries = self._entries.copy() - - -class _CurrentMirrorWritableLODDirectory( - CurrentMirrorLODDirectory, _WritableMirrorDirectoryMixin - ): - pass - - -class CurrentMirrorSubdirectory(CurrentMirrorDirectory): - """Represent a subdirectory in the mirror's current version.""" - - def __init__(self, repo, id, lod, cvs_path, parent_mirror_dir, entries): - CurrentMirrorDirectory.__init__(self, repo, id, lod, cvs_path, entries) - self.parent_mirror_dir = parent_mirror_dir - - def delete(self): - """Remove the directory represented by this object.""" - - del self.parent_mirror_dir[self.cvs_path] - - -class _CurrentMirrorReadOnlySubdirectory( - CurrentMirrorSubdirectory, _ReadOnlyMirrorDirectoryMixin - ): - """Represent a subdirectory in the mirror's current version.""" - - def _make_writable(self): - self.__class__ = _CurrentMirrorWritableSubdirectory - # Create a new ID: - self.id = self.repo._key_generator.gen_id() - self.repo._new_nodes[self.id] = self - self.parent_mirror_dir._set_entry(self.cvs_path, self) - self._entries = self._entries.copy() - - -class _CurrentMirrorWritableSubdirectory( - CurrentMirrorSubdirectory, _WritableMirrorDirectoryMixin - ): - pass - - -class LODHistory(object): - """The history of root nodes for a line of development. - - Members: - - _mirror -- (RepositoryMirror) the RepositoryMirror that manages - this LODHistory. - - lod -- (LineOfDevelopment) the LOD described by this LODHistory. - - revnums -- (list of int) the revision numbers in which the id - changed, in numerical order. - - ids -- (list of (int or None)) the ID of the node describing the - root of this LOD starting at the corresponding revision - number, or None if the LOD did not exist in that revision. - - To find the root id for a given revision number, a binary search is - done within REVNUMS to find the index of the most recent revision at - the time of REVNUM, then that index is used to read the id out of - IDS. - - A sentry is written at the zeroth index of both arrays to describe - the initial situation, namely, that the LOD doesn't exist in - revision r0.""" - - __slots__ = ['_mirror', 'lod', 'revnums', 'ids'] - - def __init__(self, mirror, lod): - self._mirror = mirror - self.lod = lod - self.revnums = [0] - self.ids = [None] - - def get_id(self, revnum): - """Get the ID of the root path for this LOD in REVNUM. - - Raise KeyError if this LOD didn't exist in REVNUM.""" - - index = bisect.bisect_right(self.revnums, revnum) - 1 - id = self.ids[index] - - if id is None: - raise KeyError() - - return id - - def get_current_id(self): - """Get the ID of the root path for this LOD in the current revision. - - Raise KeyError if this LOD doesn't currently exist.""" - - id = self.ids[-1] - - if id is None: - raise KeyError() - - return id - - def exists(self): - """Return True iff LOD exists in the current revision.""" - - return self.ids[-1] is not None - - def update(self, revnum, id): - """Indicate that the root node of this LOD changed to ID at REVNUM. - - REVNUM is a revision number that must be the same as that of the - previous recorded change (in which case the previous change is - overwritten) or later (in which the new change is appended). - - ID can be a node ID, or it can be None to indicate that this LOD - ceased to exist in REVNUM.""" - - if revnum < self.revnums[-1]: - raise KeyError() - elif revnum == self.revnums[-1]: - # This is an attempt to overwrite an entry that was already - # updated during this revision. Don't allow the replacement - # None -> None or allow one new id to be replaced with another: - old_id = self.ids[-1] - if old_id is None and id is None: - raise InternalError( - 'ID changed from None -> None for %s, r%d' % (self.lod, revnum,) - ) - elif (old_id is not None and id is not None - and old_id in self._mirror._new_nodes): - raise InternalError( - 'ID changed from %x -> %x for %s, r%d' - % (old_id, id, self.lod, revnum,) - ) - self.ids[-1] = id - else: - self.revnums.append(revnum) - self.ids.append(id) - - -class _NodeDatabase(object): - """A database storing all of the directory nodes. - - The nodes are written in groups every time write_new_nodes() is - called. To the database is written a dictionary {node_id : - [(cvs_path.id, node_id),...]}, where the keys are the node_ids of - the new nodes. When a node is read, its whole group is read and - cached under the assumption that the other nodes in the group are - likely to be needed soon. The cache is retained across revisions - and cleared when _cache_max_size is exceeded. - - The dictionaries for nodes that have been read from the database - during the current revision are cached by node_id in the _cache - member variable. The corresponding dictionaries are *not* copied - when read. To avoid cross-talk between distinct MirrorDirectory - instances that have the same node_id, users of these dictionaries - have to copy them before modification.""" - - # How many entries should be allowed in the cache for each - # CVSDirectory in the repository. (This number is very roughly the - # number of complete lines of development that can be stored in the - # cache at one time.) - CACHE_SIZE_MULTIPLIER = 5 - - # But the cache will never be limited to less than this number: - MIN_CACHE_LIMIT = 5000 - - def __init__(self): - self.cvs_file_db = Ctx()._cvs_file_db - self.db = IndexedDatabase( - artifact_manager.get_temp_file(config.MIRROR_NODES_STORE), - artifact_manager.get_temp_file(config.MIRROR_NODES_INDEX_TABLE), - DB_OPEN_NEW, serializer=MarshalSerializer(), - ) - - # A list of the maximum node_id stored by each call to - # write_new_nodes(): - self._max_node_ids = [0] - - # A map {node_id : {cvs_path : node_id}}: - self._cache = {} - - # The number of directories in the repository: - num_dirs = len([ - cvs_path - for cvs_path in self.cvs_file_db.itervalues() - if isinstance(cvs_path, CVSDirectory) - ]) - - self._cache_max_size = max( - int(self.CACHE_SIZE_MULTIPLIER * num_dirs), - self.MIN_CACHE_LIMIT, - ) - - def _load(self, items): - retval = {} - for (id, value) in items: - retval[self.cvs_file_db.get_file(id)] = value - return retval - - def _dump(self, node): - return [ - (cvs_path.id, value) - for (cvs_path, value) in node.iteritems() - ] - - def _determine_index(self, id): - """Return the index of the record holding the node with ID.""" - - return bisect.bisect_left(self._max_node_ids, id) - - def __getitem__(self, id): - try: - items = self._cache[id] - except KeyError: - index = self._determine_index(id) - for (node_id, items) in self.db[index].items(): - self._cache[node_id] = self._load(items) - items = self._cache[id] - - return items - - def write_new_nodes(self, nodes): - """Write NODES to the database. - - NODES is an iterable of writable CurrentMirrorDirectory instances.""" - - if len(self._cache) > self._cache_max_size: - # The size of the cache has exceeded the threshold. Discard the - # old cache values (but still store the new nodes into the - # cache): - Log().debug('Clearing node cache') - self._cache.clear() - - data = {} - max_node_id = 0 - for node in nodes: - max_node_id = max(max_node_id, node.id) - data[node.id] = self._dump(node._entries) - self._cache[node.id] = node._entries - - self.db[len(self._max_node_ids)] = data - - if max_node_id == 0: - # Rewrite last value: - self._max_node_ids.append(self._max_node_ids[-1]) - else: - self._max_node_ids.append(max_node_id) - - def close(self): - self._cache.clear() - self.db.close() - self.db = None - - -class RepositoryMirror: - """Mirror a repository and its history. - - Mirror a repository as it is constructed, one revision at a time. - For each LineOfDevelopment we store a skeleton of the directory - structure within that LOD for each revnum in which it changed. - - For each LOD that has been seen so far, an LODHistory instance is - stored in self._lod_histories. An LODHistory keeps track of each - revnum in which files were added to or deleted from that LOD, as - well as the node id of the root of the node tree describing the LOD - contents at that revision. - - The LOD trees themselves are stored in the _node_db database, which - maps node ids to nodes. A node is a map from CVSPath to ids of the - corresponding subnodes. The _node_db is stored on disk and each - access is expensive. - - The _node_db database only holds the nodes for old revisions. The - revision that is being constructed is kept in memory in the - _new_nodes map, which is cheap to access. - - You must invoke start_commit() before each commit and end_commit() - afterwards.""" - - def register_artifacts(self, which_pass): - """Register the artifacts that will be needed for this object.""" - - artifact_manager.register_temp_file( - config.MIRROR_NODES_INDEX_TABLE, which_pass - ) - artifact_manager.register_temp_file( - config.MIRROR_NODES_STORE, which_pass - ) - - def open(self): - """Set up the RepositoryMirror and prepare it for commits.""" - - self._key_generator = KeyGenerator() - - # A map from LOD to LODHistory instance for all LODs that have - # been referenced so far: - self._lod_histories = {} - - # This corresponds to the 'nodes' table in a Subversion fs. (We - # don't need a 'representations' or 'strings' table because we - # only track file existence, not file contents.) - self._node_db = _NodeDatabase() - - # Start at revision 0 without a root node. - self._youngest = 0 - - def start_commit(self, revnum): - """Start a new commit.""" - - assert revnum > self._youngest - self._youngest = revnum - - # A map {node_id : _WritableMirrorDirectoryMixin}. - self._new_nodes = {} - - def end_commit(self): - """Called at the end of each commit. - - This method copies the newly created nodes to the on-disk nodes - db.""" - - # Copy the new nodes to the _node_db - self._node_db.write_new_nodes([ - node - for node in self._new_nodes.values() - if not isinstance(node, DeletedCurrentMirrorDirectory) - ]) - - del self._new_nodes - - def _get_lod_history(self, lod): - """Return the LODHistory instance describing LOD. - - Create a new (empty) LODHistory if it doesn't yet exist.""" - - try: - return self._lod_histories[lod] - except KeyError: - lod_history = LODHistory(self, lod) - self._lod_histories[lod] = lod_history - return lod_history - - def get_old_lod_directory(self, lod, revnum): - """Return the directory for the root path of LOD at revision REVNUM. - - Return an instance of MirrorDirectory if the path exists; - otherwise, raise KeyError.""" - - lod_history = self._get_lod_history(lod) - id = lod_history.get_id(revnum) - return OldMirrorDirectory(self, id, self._node_db[id]) - - def get_old_path(self, cvs_path, lod, revnum): - """Return the node for CVS_PATH from LOD at REVNUM. - - If CVS_PATH is a CVSDirectory, then return an instance of - OldMirrorDirectory. If CVS_PATH is a CVSFile, return None. - - If CVS_PATH does not exist in the specified LOD and REVNUM, raise - KeyError.""" - - node = self.get_old_lod_directory(lod, revnum) - - for sub_path in cvs_path.get_ancestry()[1:]: - node = node[sub_path] - - return node - - def get_current_lod_directory(self, lod): - """Return the directory for the root path of LOD in the current revision. - - Return an instance of CurrentMirrorDirectory. Raise KeyError if - the path doesn't already exist.""" - - lod_history = self._get_lod_history(lod) - id = lod_history.get_current_id() - try: - return self._new_nodes[id] - except KeyError: - return _CurrentMirrorReadOnlyLODDirectory( - self, id, lod, self._node_db[id] - ) - - def get_current_path(self, cvs_path, lod): - """Return the node for CVS_PATH from LOD in the current revision. - - If CVS_PATH is a CVSDirectory, then return an instance of - CurrentMirrorDirectory. If CVS_PATH is a CVSFile, return None. - - If CVS_PATH does not exist in the current revision of the - specified LOD, raise KeyError.""" - - node = self.get_current_lod_directory(lod) - - for sub_path in cvs_path.get_ancestry()[1:]: - node = node[sub_path] - - return node - - def add_lod(self, lod): - """Create a new LOD in this repository. - - Return the CurrentMirrorDirectory that was created. If the LOD - already exists, raise LODExistsError.""" - - lod_history = self._get_lod_history(lod) - if lod_history.exists(): - raise LODExistsError( - 'Attempt to create %s in repository mirror when it already exists.' - % (lod,) - ) - new_node = _CurrentMirrorWritableLODDirectory( - self, self._key_generator.gen_id(), lod, {} - ) - lod_history.update(self._youngest, new_node.id) - self._new_nodes[new_node.id] = new_node - return new_node - - def copy_lod(self, src_lod, dest_lod, src_revnum): - """Copy all of SRC_LOD at SRC_REVNUM to DST_LOD. - - In the youngest revision of the repository, the destination LOD - *must not* already exist. - - Return the new node at DEST_LOD, as a CurrentMirrorDirectory.""" - - # Get the node of our src_path - src_node = self.get_old_lod_directory(src_lod, src_revnum) - - dest_lod_history = self._get_lod_history(dest_lod) - if dest_lod_history.exists(): - raise LODExistsError( - 'Attempt to copy to %s in repository mirror when it already exists.' - % (dest_lod,) - ) - - dest_lod_history.update(self._youngest, src_node.id) - - # Return src_node, except packaged up as a CurrentMirrorDirectory: - return self.get_current_lod_directory(dest_lod) - - def close(self): - """Free resources and close databases.""" - - self._lod_histories = None - self._node_db.close() - self._node_db = None - - diff --git a/cvs2svn_lib/revision_manager.py b/cvs2svn_lib/revision_manager.py deleted file mode 100644 index 8af7c74..0000000 --- a/cvs2svn_lib/revision_manager.py +++ /dev/null @@ -1,189 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2000-2008 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""This module describes the interface to the CVS repository.""" - - -class RevisionRecorder: - """An object that can record text and deltas from CVS files.""" - - def __init__(self): - """Initialize the RevisionRecorder. - - Please note that a RevisionRecorder is instantiated in every - program run, even if the data-collection pass will not be - executed. (This is to allow it to register the artifacts that it - produces.) Therefore, the __init__() method should not do much, - and more substantial preparation for use (like actually creating - the artifacts) should be done in start().""" - - pass - - def register_artifacts(self, which_pass): - """Register artifacts that will be needed during data recording. - - WHICH_PASS is the pass that will call our callbacks, so it should - be used to do the registering (e.g., call - WHICH_PASS.register_temp_file() and/or - WHICH_PASS.register_temp_file_needed()).""" - - pass - - def start(self): - """Data will soon start being collected. - - Any non-idempotent initialization should be done here.""" - - pass - - def start_file(self, cvs_file_items): - """Prepare to receive data for the file with the specified CVS_FILE_ITEMS. - - CVS_FILE_ITEMS is an instance of CVSFileItems describing the file - dependency topology right after the file tree was parsed out of - the RCS file. (I.e., it reflects the original CVS dependency - structure.) Please note that the CVSFileItems instance will be - changed later.""" - - pass - - def record_text(self, cvs_rev, log, text): - """Record information about a revision and optionally return a token. - - CVS_REV is a CVSRevision instance describing a revision that has - log message LOG and text TEXT (as retrieved from the RCS file). - (TEXT is full text for the HEAD revision, and deltas for other - revisions.)""" - - raise NotImplementedError() - - def finish_file(self, cvs_file_items): - """The current file is finished; finish and clean up. - - CVS_FILE_ITEMS is a CVSFileItems instance describing the file's - items at the end of processing of the RCS file in CollectRevsPass. - It may be modified relative to the CVS_FILE_ITEMS instance passed - to the corresponding start_file() call (revisions might be - deleted, topology changed, etc).""" - - pass - - def finish(self): - """All recording is done; clean up.""" - - pass - - -class NullRevisionRecorder(RevisionRecorder): - """A do-nothing variety of RevisionRecorder.""" - - def record_text(self, cvs_rev, log, text): - return None - - -class RevisionExcluder: - """An interface for informing a RevisionReader about excluded revisions. - - Currently, revisions can be excluded via the --exclude option and - various fixups for CVS peculiarities. This interface can be used to - inform the associated RevisionReader about CVSItems that are being - excluded. (The recorder might use that information to free some - temporary data or adjust its expectations about which revisions will - later be read.)""" - - def __init__(self): - """Initialize the RevisionExcluder. - - Please note that a RevisionExcluder is instantiated in every - program run, even if the branch-exclusion pass will not be - executed. (This is to allow its register_artifacts() method to be - called.) Therefore, the __init__() method should not do much, and - more substantial preparation for use (like actually creating the - artifacts) should be done in start().""" - - pass - - def register_artifacts(self, which_pass): - """Register artifacts that will be needed during branch exclusion. - - WHICH_PASS is the pass that will call our callbacks, so it should - be used to do the registering (e.g., call - WHICH_PASS.register_temp_file() and/or - WHICH_PASS.register_temp_file_needed()).""" - - pass - - def start(self): - """Prepare to handle branch exclusions.""" - - pass - - def process_file(self, cvs_file_items): - """Called for files whose trees were modified in FilterSymbolsPass. - - This callback is called once for each CVSFile whose topology was - modified in FilterSymbolsPass.""" - - raise NotImplementedError() - - def finish(self): - """Called after all branch exclusions for all files are done.""" - - pass - - -class NullRevisionExcluder(RevisionExcluder): - """A do-nothing variety of RevisionExcluder.""" - - def process_file(self, cvs_file_items): - pass - - -class RevisionReader(object): - """An object that can read the contents of CVSRevisions.""" - - def register_artifacts(self, which_pass): - """Register artifacts that will be needed during branch exclusion. - - WHICH_PASS is the pass that will call our callbacks, so it should - be used to do the registering (e.g., call - WHICH_PASS.register_temp_file() and/or - WHICH_PASS.register_temp_file_needed()).""" - - pass - - def start(self): - """Prepare for calls to get_content_stream.""" - - pass - - def get_content_stream(self, cvs_rev, suppress_keyword_substitution=False): - """Return a file-like object from which the contents of CVS_REV - can be read. - - CVS_REV is a CVSRevision. If SUPPRESS_KEYWORD_SUBSTITUTION is - True, then suppress the substitution of RCS/CVS keywords in the - output.""" - - raise NotImplementedError - - def finish(self): - """Inform the reader that all calls to get_content_stream are done. - Start may be called again at a later point.""" - - pass - - diff --git a/cvs2svn_lib/run_options.py b/cvs2svn_lib/run_options.py deleted file mode 100644 index 27d2ea6..0000000 --- a/cvs2svn_lib/run_options.py +++ /dev/null @@ -1,1035 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2000-2009 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""This module contains classes to set common cvs2xxx run options.""" - -import sys -import re -import optparse -from optparse import OptionGroup -import time - -from cvs2svn_lib.version import VERSION -from cvs2svn_lib import config -from cvs2svn_lib.common import warning_prefix -from cvs2svn_lib.common import error_prefix -from cvs2svn_lib.common import FatalError -from cvs2svn_lib.common import CVSTextDecoder -from cvs2svn_lib.log import Log -from cvs2svn_lib.context import Ctx -from cvs2svn_lib.man_writer import ManOption -from cvs2svn_lib.pass_manager import InvalidPassError -from cvs2svn_lib.symbol_strategy import AllBranchRule -from cvs2svn_lib.symbol_strategy import AllTagRule -from cvs2svn_lib.symbol_strategy import BranchIfCommitsRule -from cvs2svn_lib.symbol_strategy import ExcludeRegexpStrategyRule -from cvs2svn_lib.symbol_strategy import ForceBranchRegexpStrategyRule -from cvs2svn_lib.symbol_strategy import ForceTagRegexpStrategyRule -from cvs2svn_lib.symbol_strategy import ExcludeTrivialImportBranchRule -from cvs2svn_lib.symbol_strategy import HeuristicStrategyRule -from cvs2svn_lib.symbol_strategy import UnambiguousUsageRule -from cvs2svn_lib.symbol_strategy import HeuristicPreferredParentRule -from cvs2svn_lib.symbol_strategy import SymbolHintsFileRule -from cvs2svn_lib.symbol_transform import ReplaceSubstringsSymbolTransform -from cvs2svn_lib.symbol_transform import RegexpSymbolTransform -from cvs2svn_lib.symbol_transform import NormalizePathsSymbolTransform -from cvs2svn_lib.property_setters import AutoPropsPropertySetter -from cvs2svn_lib.property_setters import CVSBinaryFileDefaultMimeTypeSetter -from cvs2svn_lib.property_setters import CVSBinaryFileEOLStyleSetter -from cvs2svn_lib.property_setters import CVSRevisionNumberSetter -from cvs2svn_lib.property_setters import DefaultEOLStyleSetter -from cvs2svn_lib.property_setters import EOLStyleFromMimeTypeSetter -from cvs2svn_lib.property_setters import ExecutablePropertySetter -from cvs2svn_lib.property_setters import KeywordsPropertySetter -from cvs2svn_lib.property_setters import MimeMapper -from cvs2svn_lib.property_setters import SVNBinaryFileKeywordsPropertySetter - - -usage = """\ -Usage: %prog --options OPTIONFILE - %prog [OPTION...] OUTPUT-OPTION CVS-REPOS-PATH""" - -description="""\ -Convert a CVS repository into a Subversion repository, including history. -""" - -authors = u"""\ -Main authors are: -.br -C. Michael Pilato -.br -Greg Stein -.br -Branko \u010cibej -.br -Blair Zajac -.br -Max Bowsher -.br -Brian Fitzpatrick -.br -Tobias Ringstr\u00f6m -.br -Karl Fogel -.br -Erik H\u00fclsmann -.br -David Summers -.br -Michael Haggerty -.PP -Manpage was written for the Debian GNU/Linux system by -Laszlo 'GCS' Boszormenyi (but may be used by others). -""" - - -class IncompatibleOption(ManOption): - """A ManOption that is incompatible with the --options option. - - Record that the option was used so that error checking can later be - done.""" - - def __init__(self, *args, **kw): - ManOption.__init__(self, *args, **kw) - - def take_action(self, action, dest, opt, value, values, parser): - oio = parser.values.options_incompatible_options - if opt not in oio: - oio.append(opt) - return ManOption.take_action( - self, action, dest, opt, value, values, parser - ) - - -class ContextOption(ManOption): - """A ManOption that stores its value to Ctx.""" - - def __init__(self, *args, **kw): - if kw.get('action') not in self.STORE_ACTIONS: - raise ValueError('Invalid action: %s' % (kw['action'],)) - - self.__compatible_with_option = kw.pop('compatible_with_option', False) - self.__action = kw.pop('action') - try: - self.__dest = kw.pop('dest') - except KeyError: - opt = args[0] - if not opt.startswith('--'): - raise ValueError - self.__dest = opt[2:].replace('-', '_') - if 'const' in kw: - self.__const = kw.pop('const') - - kw['action'] = 'callback' - kw['callback'] = self.__callback - - ManOption.__init__(self, *args, **kw) - - def __callback(self, option, opt_str, value, parser): - if not self.__compatible_with_option: - oio = parser.values.options_incompatible_options - if opt_str not in oio: - oio.append(opt_str) - - action = self.__action - dest = self.__dest - - if action == "store": - setattr(Ctx(), dest, value) - elif action == "store_const": - setattr(Ctx(), dest, self.__const) - elif action == "store_true": - setattr(Ctx(), dest, True) - elif action == "store_false": - setattr(Ctx(), dest, False) - elif action == "append": - getattr(Ctx(), dest).append(value) - elif action == "count": - setattr(Ctx(), dest, getattr(Ctx(), dest, 0) + 1) - else: - raise RuntimeError("unknown action %r" % self.__action) - - return 1 - - -class IncompatibleOptionsException(FatalError): - pass - - -# Options that are not allowed to be used with --trunk-only: -SYMBOL_OPTIONS = [ - '--symbol-transform', - '--symbol-hints', - '--force-branch', - '--force-tag', - '--exclude', - '--keep-trivial-imports', - '--symbol-default', - '--no-cross-branch-commits', - ] - -class SymbolOptionsWithTrunkOnlyException(IncompatibleOptionsException): - def __init__(self): - IncompatibleOptionsException.__init__( - self, - 'The following symbol-related options cannot be used together\n' - 'with --trunk-only:\n' - ' %s' - % ('\n '.join(SYMBOL_OPTIONS),) - ) - - -def not_both(opt1val, opt1name, opt2val, opt2name): - """Raise an exception if both opt1val and opt2val are set.""" - if opt1val and opt2val: - raise IncompatibleOptionsException( - "cannot pass both '%s' and '%s'." % (opt1name, opt2name,) - ) - - -class RunOptions(object): - """A place to store meta-options that are used to start the conversion.""" - - def __init__(self, progname, cmd_args, pass_manager): - """Process the command-line options, storing run options to SELF. - - PROGNAME is the name of the program, used in the usage string. - CMD_ARGS is the list of command-line arguments passed to the - program. PASS_MANAGER is an instance of PassManager, needed to - help process the -p and --help-passes options.""" - - self.progname = progname - self.cmd_args = cmd_args - self.pass_manager = pass_manager - self.start_pass = 1 - self.end_pass = self.pass_manager.num_passes - self.profiling = False - - self.projects = [] - - # A list of one list of SymbolStrategyRules for each project: - self.project_symbol_strategy_rules = [] - - parser = self.parser = optparse.OptionParser( - usage=usage, - description=self.get_description(), - add_help_option=False, - ) - # A place to record any options used that are incompatible with - # --options: - parser.set_default('options_incompatible_options', []) - - # Populate the options parser with the options, one group at a - # time: - parser.add_option_group(self._get_options_file_options_group()) - parser.add_option_group(self._get_output_options_group()) - parser.add_option_group(self._get_conversion_options_group()) - parser.add_option_group(self._get_symbol_handling_options_group()) - parser.add_option_group(self._get_subversion_properties_options_group()) - parser.add_option_group(self._get_extraction_options_group()) - parser.add_option_group(self._get_environment_options_group()) - parser.add_option_group(self._get_partial_conversion_options_group()) - parser.add_option_group(self._get_information_options_group()) - - (self.options, self.args) = parser.parse_args(args=self.cmd_args) - - # Now the log level has been set; log the time when the run started: - Log().verbose( - time.strftime( - 'Conversion start time: %Y-%m-%d %I:%M:%S %Z', - time.localtime(Log().start_time) - ) - ) - - if self.options.options_file_found: - # Check that no options that are incompatible with --options - # were used: - self.verify_option_compatibility() - else: - # --options was not specified. So do the main initialization - # based on other command-line options: - self.process_options() - - # Check for problems with the options: - self.check_options() - - def get_description(self): - return description - - def _get_options_file_options_group(self): - group = OptionGroup( - self.parser, 'Configuration via options file' - ) - self.parser.set_default('options_file_found', False) - group.add_option(ManOption( - '--options', type='string', - action='callback', callback=self.callback_options, - help=( - 'read the conversion options from PATH. This ' - 'method allows more flexibility than using ' - 'command-line options. See documentation for info' - ), - man_help=( - 'Read the conversion options from \\fIpath\\fR instead of from ' - 'the command line. This option allows far more conversion ' - 'flexibility than can be achieved using the command-line alone. ' - 'See the documentation for more information. Only the following ' - 'command-line options are allowed in combination with ' - '\\fB--options\\fR: \\fB-h\\fR/\\fB--help\\fR, ' - '\\fB--help-passes\\fR, \\fB--version\\fR, ' - '\\fB-v\\fR/\\fB--verbose\\fR, \\fB-q\\fR/\\fB--quiet\\fR, ' - '\\fB-p\\fR/\\fB--pass\\fR/\\fB--passes\\fR, \\fB--dry-run\\fR, ' - '\\fB--profile\\fR, \\fB--sort\\fR, \\fB--trunk-only\\fR, ' - '\\fB--encoding\\fR, and \\fB--fallback-encoding\\fR. ' - 'Options are processed in the order specified on the command ' - 'line.' - ), - metavar='PATH', - )) - return group - - def _get_output_options_group(self): - group = OptionGroup(self.parser, 'Output options') - return group - - def _get_conversion_options_group(self): - group = OptionGroup(self.parser, 'Conversion options') - group.add_option(ContextOption( - '--trunk-only', - action='store_true', - compatible_with_option=True, - help='convert only trunk commits, not tags nor branches', - man_help=( - 'Convert only trunk commits, not tags nor branches.' - ), - )) - group.add_option(ManOption( - '--encoding', type='string', - action='callback', callback=self.callback_encoding, - help=( - 'encoding for paths and log messages in CVS repos. ' - 'If option is specified multiple times, encoders ' - 'are tried in order until one succeeds. See ' - 'http://docs.python.org/lib/standard-encodings.html ' - 'for a list of standard Python encodings.' - ), - man_help=( - 'Use \\fIencoding\\fR as the encoding for filenames, log ' - 'messages, and author names in the CVS repos. This option ' - 'may be specified multiple times, in which case the encodings ' - 'are tried in order until one succeeds. Default: ascii. See ' - 'http://docs.python.org/lib/standard-encodings.html for a list ' - 'of other standard encodings.' - ), - metavar='ENC', - )) - group.add_option(ManOption( - '--fallback-encoding', type='string', - action='callback', callback=self.callback_fallback_encoding, - help='If all --encodings fail, use lossy encoding with ENC', - man_help=( - 'If none of the encodings specified with \\fB--encoding\\fR ' - 'succeed in decoding an author name or log message, then fall ' - 'back to using \\fIencoding\\fR in lossy \'replace\' mode. ' - 'Use of this option may cause information to be lost, but at ' - 'least it allows the conversion to run to completion. This ' - 'option only affects the encoding of log messages and author ' - 'names; there is no fallback encoding for filenames. (By ' - 'using an \\fB--options\\fR file, it is possible to specify ' - 'a fallback encoding for filenames.) Default: disabled.' - ), - metavar='ENC', - )) - group.add_option(ContextOption( - '--retain-conflicting-attic-files', - action='store_true', - help=( - 'if a file appears both in and out of ' - 'the CVS Attic, then leave the attic version in a ' - 'SVN directory called "Attic"' - ), - man_help=( - 'If a file appears both inside an outside of the CVS attic, ' - 'retain the attic version in an SVN subdirectory called ' - '\'Attic\'. (Normally this situation is treated as a fatal ' - 'error.)' - ), - )) - - return group - - def _get_symbol_handling_options_group(self): - group = OptionGroup(self.parser, 'Symbol handling') - self.parser.set_default('symbol_transforms', []) - group.add_option(IncompatibleOption( - '--symbol-transform', type='string', - action='callback', callback=self.callback_symbol_transform, - help=( - 'transform symbol names from P to S, where P and S ' - 'use Python regexp and reference syntax ' - 'respectively. P must match the whole symbol name' - ), - man_help=( - 'Transform RCS/CVS symbol names before entering them into ' - 'Subversion. \\fIpattern\\fR is a Python regexp pattern that ' - 'is matches against the entire symbol name; \\fIreplacement\\fR ' - 'is a replacement using Python\'s regexp reference syntax. ' - 'You may specify any number of these options; they will be ' - 'applied in the order given on the command line.' - ), - metavar='P:S', - )) - self.parser.set_default('symbol_strategy_rules', []) - group.add_option(IncompatibleOption( - '--symbol-hints', type='string', - action='callback', callback=self.callback_symbol_hints, - help='read symbol conversion hints from PATH', - man_help=( - 'Read symbol conversion hints from \\fIpath\\fR. The format of ' - '\\fIpath\\fR is the same as the format output by ' - '\\fB--write-symbol-info\\fR, namely a text file with four ' - 'whitespace-separated columns: \\fIproject-id\\fR, ' - '\\fIsymbol\\fR, \\fIconversion\\fR, and ' - '\\fIparent-lod-name\\fR. \\fIproject-id\\fR is the numerical ' - 'ID of the project to which the symbol belongs, counting from ' - '0. \\fIproject-id\\fR can be set to \'.\' if ' - 'project-specificity is not needed. \\fIsymbol-name\\fR is the ' - 'name of the symbol being specified. \\fIconversion\\fR ' - 'specifies how the symbol should be converted, and can be one ' - 'of the values \'branch\', \'tag\', or \'exclude\'. If ' - '\\fIconversion\\fR is \'.\', then this rule does not affect ' - 'how the symbol is converted. \\fIparent-lod-name\\fR is the ' - 'name of the symbol from which this symbol should sprout, or ' - '\'.trunk.\' if the symbol should sprout from trunk. If ' - '\\fIparent-lod-name\\fR is omitted or \'.\', then this rule ' - 'does not affect the preferred parent of this symbol. The file ' - 'may contain blank lines or comment lines (lines whose first ' - 'non-whitespace character is \'#\').' - ), - metavar='PATH', - )) - self.parser.set_default('symbol_default', 'heuristic') - group.add_option(IncompatibleOption( - '--symbol-default', type='choice', - choices=['heuristic', 'strict', 'branch', 'tag'], - action='store', - help=( - 'specify how ambiguous symbols are converted. ' - 'OPT is "heuristic" (default), "strict", "branch", ' - 'or "tag"' - ), - man_help=( - 'Specify how to convert ambiguous symbols (those that appear in ' - 'the CVS archive as both branches and tags). \\fIopt\\fR must ' - 'be \'heuristic\' (decide how to treat each ambiguous symbol ' - 'based on whether it was used more often as a branch/tag in ' - 'CVS), \'strict\' (no default; every ambiguous symbol has to be ' - 'resolved manually using \\fB--force-branch\\fR, ' - '\\fB--force-tag\\fR, or \\fB--exclude\\fR), \'branch\' (treat ' - 'every ambiguous symbol as a branch), or \'tag\' (treat every ' - 'ambiguous symbol as a tag). The default is \'heuristic\'.' - ), - metavar='OPT', - )) - group.add_option(IncompatibleOption( - '--force-branch', type='string', - action='callback', callback=self.callback_force_branch, - help='force symbols matching REGEXP to be branches', - man_help=( - 'Force symbols whose names match \\fIregexp\\fR to be branches. ' - '\\fIregexp\\fR must match the whole symbol name.' - ), - metavar='REGEXP', - )) - group.add_option(IncompatibleOption( - '--force-tag', type='string', - action='callback', callback=self.callback_force_tag, - help='force symbols matching REGEXP to be tags', - man_help=( - 'Force symbols whose names match \\fIregexp\\fR to be tags. ' - '\\fIregexp\\fR must match the whole symbol name.' - ), - metavar='REGEXP', - )) - group.add_option(IncompatibleOption( - '--exclude', type='string', - action='callback', callback=self.callback_exclude, - help='exclude branches and tags matching REGEXP', - man_help=( - 'Exclude branches and tags whose names match \\fIregexp\\fR ' - 'from the conversion. \\fIregexp\\fR must match the whole ' - 'symbol name.' - ), - metavar='REGEXP', - )) - self.parser.set_default('keep_trivial_imports', False) - group.add_option(IncompatibleOption( - '--keep-trivial-imports', - action='store_true', - help=( - 'do not exclude branches that were only used for ' - 'a single import (usually these are unneeded)' - ), - man_help=( - 'Do not exclude branches that were only used for a single ' - 'import. (By default such branches are excluded because they ' - 'are usually created by the inappropriate use of \\fBcvs ' - 'import\\fR.)' - ), - )) - - return group - - def _get_subversion_properties_options_group(self): - group = OptionGroup(self.parser, 'Subversion properties') - group.add_option(ContextOption( - '--username', type='string', - action='store', - help='username for cvs2svn-synthesized commits', - man_help=( - 'Set the default username to \\fIname\\fR when cvs2svn needs ' - 'to generate a commit for which CVS does not record the ' - 'original username. This happens when a branch or tag is ' - 'created. The default is to use no author at all for such ' - 'commits.' - ), - metavar='NAME', - )) - self.parser.set_default('auto_props_files', []) - group.add_option(IncompatibleOption( - '--auto-props', type='string', - action='append', dest='auto_props_files', - help=( - 'set file properties from the auto-props section ' - 'of a file in svn config format' - ), - man_help=( - 'Specify a file in the format of Subversion\'s config file, ' - 'whose [auto-props] section can be used to set arbitrary ' - 'properties on files in the Subversion repository based on ' - 'their filenames. (The [auto-props] section header must be ' - 'present; other sections of the config file, including the ' - 'enable-auto-props setting, are ignored.) Filenames are matched ' - 'to the filename patterns case-insensitively.' - - ), - metavar='FILE', - )) - self.parser.set_default('mime_types_files', []) - group.add_option(IncompatibleOption( - '--mime-types', type='string', - action='append', dest='mime_types_files', - help=( - 'specify an apache-style mime.types file for setting ' - 'svn:mime-type' - ), - man_help=( - 'Specify an apache-style mime.types \\fIfile\\fR for setting ' - 'svn:mime-type.' - ), - metavar='FILE', - )) - self.parser.set_default('eol_from_mime_type', False) - group.add_option(IncompatibleOption( - '--eol-from-mime-type', - action='store_true', - help='set svn:eol-style from mime type if known', - man_help=( - 'For files that don\'t have the kb expansion mode but have a ' - 'known mime type, set the eol-style based on the mime type. ' - 'For such files, set svn:eol-style to "native" if the mime type ' - 'begins with "text/", and leave it unset (i.e., no EOL ' - 'translation) otherwise. Files with unknown mime types are ' - 'not affected by this option. This option has no effect ' - 'unless the \\fB--mime-types\\fR option is also specified.' - ), - )) - group.add_option(IncompatibleOption( - '--default-eol', type='choice', - choices=['binary', 'native', 'CRLF', 'LF', 'CR'], - action='store', - help=( - 'default svn:eol-style for non-binary files with ' - 'undetermined mime types. STYLE is "binary" ' - '(default), "native", "CRLF", "LF", or "CR"' - ), - man_help=( - 'Set svn:eol-style to \\fIstyle\\fR for files that don\'t have ' - 'the CVS \'kb\' expansion mode and whose end-of-line ' - 'translation mode hasn\'t been determined by one of the other ' - 'options. \\fIstyle\\fR must be \'binary\' (default), ' - '\'native\', \'CRLF\', \'LF\', or \'CR\'.' - ), - metavar='STYLE', - )) - self.parser.set_default('keywords_off', False) - group.add_option(IncompatibleOption( - '--keywords-off', - action='store_true', - help=( - 'don\'t set svn:keywords on any files (by default, ' - 'cvs2svn sets svn:keywords on non-binary files to "%s")' - % (config.SVN_KEYWORDS_VALUE,) - ), - man_help=( - 'By default, cvs2svn sets svn:keywords on CVS files to "author ' - 'id date" if the mode of the RCS file in question is either kv, ' - 'kvl or unset. If you use the --keywords-off switch, cvs2svn ' - 'will not set svn:keywords for any file. While this will not ' - 'touch the keywords in the contents of your files, Subversion ' - 'will not expand them.' - ), - )) - group.add_option(ContextOption( - '--keep-cvsignore', - action='store_true', - help=( - 'keep .cvsignore files (in addition to creating ' - 'the analogous svn:ignore properties)' - ), - man_help=( - 'Include \\fI.cvsignore\\fR files in the output. (Normally ' - 'they are unneeded because cvs2svn sets the corresponding ' - '\\fIsvn:ignore\\fR properties.)' - ), - )) - group.add_option(IncompatibleOption( - '--cvs-revnums', - action='callback', callback=self.callback_cvs_revnums, - help='record CVS revision numbers as file properties', - man_help=( - 'Record CVS revision numbers as file properties in the ' - 'Subversion repository. (Note that unless it is removed ' - 'explicitly, the last CVS revision number will remain ' - 'associated with the file even after the file is changed ' - 'within Subversion.)' - ), - )) - - # Deprecated options: - group.add_option(IncompatibleOption( - '--no-default-eol', - action='store_const', dest='default_eol', const=None, - help=optparse.SUPPRESS_HELP, - man_help=optparse.SUPPRESS_HELP, - )) - self.parser.set_default('auto_props_ignore_case', True) - # True is the default now, so this option has no effect: - group.add_option(IncompatibleOption( - '--auto-props-ignore-case', - action='store_true', - help=optparse.SUPPRESS_HELP, - man_help=optparse.SUPPRESS_HELP, - )) - - return group - - def _get_extraction_options_group(self): - group = OptionGroup(self.parser, 'Extraction options') - - return group - - def _get_environment_options_group(self): - group = OptionGroup(self.parser, 'Environment options') - group.add_option(ContextOption( - '--tmpdir', type='string', - action='store', - help=( - 'directory to use for temporary data files ' - '(default "cvs2svn-tmp")' - ), - man_help=( - 'Set the \\fIpath\\fR to use for temporary data. Default ' - 'is a directory called \\fIcvs2svn-tmp\\fR under the current ' - 'directory.' - ), - metavar='PATH', - )) - self.parser.set_default('co_executable', config.CO_EXECUTABLE) - group.add_option(IncompatibleOption( - '--co', type='string', - action='store', dest='co_executable', - help='path to the "co" program (required if --use-rcs)', - man_help=( - 'Path to the \\fIco\\fR program. (\\fIco\\fR is needed if the ' - '\\fB--use-rcs\\fR option is used.)' - ), - metavar='PATH', - )) - self.parser.set_default('cvs_executable', config.CVS_EXECUTABLE) - group.add_option(IncompatibleOption( - '--cvs', type='string', - action='store', dest='cvs_executable', - help='path to the "cvs" program (required if --use-cvs)', - man_help=( - 'Path to the \\fIcvs\\fR program. (\\fIcvs\\fR is needed if the ' - '\\fB--use-cvs\\fR option is used.)' - ), - metavar='PATH', - )) - group.add_option(ContextOption( - '--sort', type='string', - action='store', dest='sort_executable', - compatible_with_option=True, - help='path to the GNU "sort" program', - man_help=( - 'Path to the GNU \\fIsort\\fR program. (cvs2svn requires GNU ' - 'sort.)' - ), - metavar='PATH', - )) - - return group - - def _get_partial_conversion_options_group(self): - group = OptionGroup(self.parser, 'Partial conversions') - group.add_option(ManOption( - '--pass', type='string', - action='callback', callback=self.callback_passes, - help='execute only specified PASS of conversion', - man_help=( - 'Execute only pass \\fIpass\\fR of the conversion. ' - '\\fIpass\\fR can be specified by name or by number (see ' - '\\fB--help-passes\\fR).' - ), - metavar='PASS', - )) - group.add_option(ManOption( - '--passes', '-p', type='string', - action='callback', callback=self.callback_passes, - help=( - 'execute passes START through END, inclusive (PASS, ' - 'START, and END can be pass names or numbers)' - ), - man_help=( - 'Execute passes \\fIstart\\fR through \\fIend\\fR of the ' - 'conversion (inclusive). \\fIstart\\fR and \\fIend\\fR can be ' - 'specified by name or by number (see \\fB--help-passes\\fR). ' - 'If \\fIstart\\fR or \\fIend\\fR is missing, it defaults to ' - 'the first or last pass, respectively. For this to work the ' - 'earlier passes must have been completed before on the ' - 'same CVS repository, and the generated data files must be ' - 'in the temporary directory (see \\fB--tmpdir\\fR).' - ), - metavar='[START]:[END]', - )) - - return group - - def _get_information_options_group(self): - group = OptionGroup(self.parser, 'Information options') - group.add_option(ManOption( - '--version', - action='callback', callback=self.callback_version, - help='print the version number', - man_help='Print the version number.', - )) - group.add_option(ManOption( - '--help', '-h', - action="help", - help='print this usage message and exit with success', - man_help='Print the usage message and exit with success.', - )) - group.add_option(ManOption( - '--help-passes', - action='callback', callback=self.callback_help_passes, - help='list the available passes and their numbers', - man_help=( - 'Print the numbers and names of the conversion passes and ' - 'exit with success.' - ), - )) - group.add_option(ManOption( - '--man', - action='callback', callback=self.callback_manpage, - help='write the manpage for this program to standard output', - man_help=( - 'Output the unix-style manpage for this program to standard ' - 'output.' - ), - )) - group.add_option(ManOption( - '--verbose', '-v', - action='callback', callback=self.callback_verbose, - help='verbose (may be specified twice for debug output)', - man_help=( - 'Print more information while running. This option may be ' - 'specified twice to output voluminous debugging information.' - ), - )) - group.add_option(ManOption( - '--quiet', '-q', - action='callback', callback=self.callback_quiet, - help='quiet (may be specified twice for very quiet)', - man_help=( - 'Print less information while running. This option may be ' - 'specified twice to suppress all non-error output.' - ), - )) - group.add_option(ContextOption( - '--write-symbol-info', type='string', - action='store', dest='symbol_info_filename', - help='write information and statistics about CVS symbols to PATH.', - man_help=( - 'Write to \\fIpath\\fR symbol statistics and information about ' - 'how symbols were converted during CollateSymbolsPass.' - ), - metavar='PATH', - )) - group.add_option(ContextOption( - '--skip-cleanup', - action='store_true', - help='prevent the deletion of intermediate files', - man_help='Prevent the deletion of temporary files.', - )) - group.add_option(ManOption( - '--profile', - action='callback', callback=self.callback_profile, - help='profile with \'hotshot\' (into file cvs2svn.hotshot)', - man_help=( - 'Profile with \'hotshot\' (into file \\fIcvs2svn.hotshot\\fR).' - ), - )) - - return group - - def callback_options(self, option, opt_str, value, parser): - parser.values.options_file_found = True - self.process_options_file(value) - - def callback_encoding(self, option, opt_str, value, parser): - ctx = Ctx() - - try: - ctx.cvs_author_decoder.add_encoding(value) - ctx.cvs_log_decoder.add_encoding(value) - ctx.cvs_filename_decoder.add_encoding(value) - except LookupError, e: - raise FatalError(str(e)) - - def callback_fallback_encoding(self, option, opt_str, value, parser): - ctx = Ctx() - - try: - ctx.cvs_author_decoder.set_fallback_encoding(value) - ctx.cvs_log_decoder.set_fallback_encoding(value) - # Don't use fallback_encoding for filenames. - except LookupError, e: - raise FatalError(str(e)) - - def callback_help_passes(self, option, opt_str, value, parser): - self.pass_manager.help_passes() - sys.exit(0) - - def callback_manpage(self, option, opt_str, value, parser): - raise NotImplementedError() - - def callback_version(self, option, opt_str, value, parser): - sys.stdout.write( - '%s version %s\n' % (self.progname, VERSION) - ) - sys.exit(0) - - def callback_verbose(self, option, opt_str, value, parser): - Log().increase_verbosity() - - def callback_quiet(self, option, opt_str, value, parser): - Log().decrease_verbosity() - - def callback_passes(self, option, opt_str, value, parser): - if value.find(':') >= 0: - start_pass, end_pass = value.split(':') - self.start_pass = self.pass_manager.get_pass_number(start_pass, 1) - self.end_pass = self.pass_manager.get_pass_number( - end_pass, self.pass_manager.num_passes - ) - else: - self.end_pass = \ - self.start_pass = \ - self.pass_manager.get_pass_number(value) - - def callback_profile(self, option, opt_str, value, parser): - self.profiling = True - - def callback_symbol_hints(self, option, opt_str, value, parser): - parser.values.symbol_strategy_rules.append(SymbolHintsFileRule(value)) - - def callback_force_branch(self, option, opt_str, value, parser): - parser.values.symbol_strategy_rules.append( - ForceBranchRegexpStrategyRule(value) - ) - - def callback_force_tag(self, option, opt_str, value, parser): - parser.values.symbol_strategy_rules.append( - ForceTagRegexpStrategyRule(value) - ) - - def callback_exclude(self, option, opt_str, value, parser): - parser.values.symbol_strategy_rules.append( - ExcludeRegexpStrategyRule(value) - ) - - def callback_cvs_revnums(self, option, opt_str, value, parser): - Ctx().svn_property_setters.append(CVSRevisionNumberSetter()) - - def callback_symbol_transform(self, option, opt_str, value, parser): - [pattern, replacement] = value.split(":") - try: - parser.values.symbol_transforms.append( - RegexpSymbolTransform(pattern, replacement) - ) - except re.error: - raise FatalError("'%s' is not a valid regexp." % (pattern,)) - - def process_symbol_strategy_options(self): - """Process symbol strategy-related options.""" - - ctx = Ctx() - options = self.options - - # Add the standard symbol name cleanup rules: - self.options.symbol_transforms.extend([ - ReplaceSubstringsSymbolTransform('\\','/'), - # Remove leading, trailing, and repeated slashes: - NormalizePathsSymbolTransform(), - ]) - - if ctx.trunk_only: - if options.symbol_strategy_rules or options.keep_trivial_imports: - raise SymbolOptionsWithTrunkOnlyException() - - else: - if not options.keep_trivial_imports: - options.symbol_strategy_rules.append(ExcludeTrivialImportBranchRule()) - - options.symbol_strategy_rules.append(UnambiguousUsageRule()) - if options.symbol_default == 'strict': - pass - elif options.symbol_default == 'branch': - options.symbol_strategy_rules.append(AllBranchRule()) - elif options.symbol_default == 'tag': - options.symbol_strategy_rules.append(AllTagRule()) - elif options.symbol_default == 'heuristic': - options.symbol_strategy_rules.append(BranchIfCommitsRule()) - options.symbol_strategy_rules.append(HeuristicStrategyRule()) - else: - assert False - - # Now add a rule whose job it is to pick the preferred parents of - # branches and tags: - options.symbol_strategy_rules.append(HeuristicPreferredParentRule()) - - def process_property_setter_options(self): - """Process the options that set SVN properties.""" - - ctx = Ctx() - options = self.options - - for value in options.auto_props_files: - ctx.svn_property_setters.append( - AutoPropsPropertySetter(value, options.auto_props_ignore_case) - ) - - for value in options.mime_types_files: - ctx.svn_property_setters.append(MimeMapper(value)) - - ctx.svn_property_setters.append(CVSBinaryFileEOLStyleSetter()) - - ctx.svn_property_setters.append(CVSBinaryFileDefaultMimeTypeSetter()) - - if options.eol_from_mime_type: - ctx.svn_property_setters.append(EOLStyleFromMimeTypeSetter()) - - ctx.svn_property_setters.append( - DefaultEOLStyleSetter(options.default_eol) - ) - - ctx.svn_property_setters.append(SVNBinaryFileKeywordsPropertySetter()) - - if not options.keywords_off: - ctx.svn_property_setters.append( - KeywordsPropertySetter(config.SVN_KEYWORDS_VALUE)) - - ctx.svn_property_setters.append(ExecutablePropertySetter()) - - def process_options(self): - """Do the main configuration based on command-line options. - - This method is only called if the --options option was not - specified.""" - - raise NotImplementedError() - - def check_options(self): - """Check the the run options are OK. - - This should only be called after all options have been processed.""" - - # Convenience var, so we don't have to keep instantiating this Borg. - ctx = Ctx() - - if not self.start_pass <= self.end_pass: - raise InvalidPassError( - 'Ending pass must not come before starting pass.') - - if not ctx.dry_run and ctx.output_option is None: - raise FatalError('No output option specified.') - - if ctx.output_option is not None: - ctx.output_option.check() - - if not self.projects: - raise FatalError('No project specified.') - - def verify_option_compatibility(self): - """Verify that no options incompatible with --options were used. - - The --options option was specified. Verify that no incompatible - options or arguments were specified.""" - - if self.options.options_incompatible_options or self.args: - if self.options.options_incompatible_options: - oio = self.options.options_incompatible_options - Log().error( - '%s: The following options cannot be used in combination with ' - 'the --options\n' - 'option:\n' - ' %s\n' - % (error_prefix, '\n '.join(oio)) - ) - if self.args: - Log().error( - '%s: No cvs-repos-path arguments are allowed with the --options ' - 'option.\n' - % (error_prefix,) - ) - sys.exit(1) - - def process_options_file(self, options_filename): - """Read options from the file named OPTIONS_FILENAME. - - Store the run options to SELF.""" - - g = { - 'ctx' : Ctx(), - 'run_options' : self, - } - execfile(options_filename, g) - - def usage(self): - self.parser.print_help() - - diff --git a/cvs2svn_lib/serializer.py b/cvs2svn_lib/serializer.py deleted file mode 100644 index 24bd81c..0000000 --- a/cvs2svn_lib/serializer.py +++ /dev/null @@ -1,146 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2000-2008 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""Picklers and unpicklers that are primed with known objects.""" - - -import cStringIO -import marshal -import cPickle -import zlib - - -class Serializer: - """An object able to serialize/deserialize some class of objects.""" - - def dumpf(self, f, object): - """Serialize OBJECT to file-like object F.""" - - raise NotImplementedError() - - def dumps(self, object): - """Return a string containing OBJECT in serialized form.""" - - raise NotImplementedError() - - def loadf(self, f): - """Return the next object deserialized from file-like object F.""" - - raise NotImplementedError() - - def loads(self, s): - """Return the object deserialized from string S.""" - - raise NotImplementedError() - - -class MarshalSerializer(Serializer): - """This class uses the marshal module to serialize/deserialize. - - This means that it shares the limitations of the marshal module, - namely only being able to serialize a few simple python data types - without reference loops.""" - - def dumpf(self, f, object): - marshal.dump(object, f) - - def dumps(self, object): - return marshal.dumps(object) - - def loadf(self, f): - return marshal.load(f) - - def loads(self, s): - return marshal.loads(s) - - -class PrimedPickleSerializer(Serializer): - """This class acts as a pickler/unpickler with a pre-initialized memo. - - The picklers and unpicklers are 'pre-trained' to recognize the - objects that are in the primer. If objects are recognized - from PRIMER, then only their persistent IDs need to be pickled - instead of the whole object. (Note that the memos needed for - pickling and unpickling are different.) - - A new pickler/unpickler is created for each use, each time with the - memo initialized appropriately for pickling or unpickling.""" - - def __init__(self, primer): - """Prepare to make picklers/unpicklers with the specified primer. - - The Pickler and Unpickler are 'primed' by pre-pickling PRIMER, - which can be an arbitrary object (e.g., a list of objects that are - expected to occur frequently in the objects to be serialized).""" - - f = cStringIO.StringIO() - pickler = cPickle.Pickler(f, -1) - pickler.dump(primer) - self.pickler_memo = pickler.memo - - unpickler = cPickle.Unpickler(cStringIO.StringIO(f.getvalue())) - unpickler.load() - self.unpickler_memo = unpickler.memo - - def dumpf(self, f, object): - """Serialize OBJECT to file-like object F.""" - - pickler = cPickle.Pickler(f, -1) - pickler.memo = self.pickler_memo.copy() - pickler.dump(object) - - def dumps(self, object): - """Return a string containing OBJECT in serialized form.""" - - f = cStringIO.StringIO() - self.dumpf(f, object) - return f.getvalue() - - def loadf(self, f): - """Return the next object deserialized from file-like object F.""" - - unpickler = cPickle.Unpickler(f) - unpickler.memo = self.unpickler_memo.copy() - return unpickler.load() - - def loads(self, s): - """Return the object deserialized from string S.""" - - return self.loadf(cStringIO.StringIO(s)) - - -class CompressingSerializer(Serializer): - """This class wraps other Serializers to compress their serialized data.""" - - def __init__(self, wrapee): - """Constructor. WRAPEE is the Serializer whose bitstream ought to be - compressed.""" - - self.wrapee = wrapee - - def dumpf(self, f, object): - marshal.dump(zlib.compress(self.wrapee.dumps(object), 9), f) - - def dumps(self, object): - return marshal.dumps(zlib.compress(self.wrapee.dumps(object), 9)) - - def loadf(self, f): - return self.wrapee.loads(zlib.decompress(marshal.load(f))) - - def loads(self, s): - return self.wrapee.loads(zlib.decompress(marshal.loads(s))) - - diff --git a/cvs2svn_lib/stats_keeper.py b/cvs2svn_lib/stats_keeper.py deleted file mode 100644 index 1a82540..0000000 --- a/cvs2svn_lib/stats_keeper.py +++ /dev/null @@ -1,189 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2000-2008 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""This module contains the StatsKeeper class. - -A StatsKeeper can pickle itself to a STATISTICS_FILE. This module -also includes a function to read a StatsKeeper from a STATISTICS_FILE.""" - - -import time -import cPickle -from cStringIO import StringIO - -from cvs2svn_lib.cvs_item import CVSRevision -from cvs2svn_lib.cvs_item import CVSBranch -from cvs2svn_lib.cvs_item import CVSTag - - -class StatsKeeper: - def __init__(self): - self._svn_rev_count = None - self._first_rev_date = 1L<<32 - self._last_rev_date = 0 - self._pass_timings = { } - self._stats_reflect_exclude = False - self.reset_cvs_rev_info() - - def log_duration_for_pass(self, duration, pass_num, pass_name): - self._pass_timings[pass_num] = (pass_name, duration,) - - def set_stats_reflect_exclude(self, value): - self._stats_reflect_exclude = value - - def reset_cvs_rev_info(self): - self._repos_file_count = 0 - self._repos_size = 0 - self._cvs_revs_count = 0 - self._cvs_branches_count = 0 - self._cvs_tags_count = 0 - - # A set of tag_ids seen: - self._tag_ids = set() - - # A set of branch_ids seen: - self._branch_ids = set() - - def record_cvs_file(self, cvs_file): - self._repos_file_count += 1 - self._repos_size += cvs_file.file_size - - def _record_cvs_rev(self, cvs_rev): - self._cvs_revs_count += 1 - - if cvs_rev.timestamp < self._first_rev_date: - self._first_rev_date = cvs_rev.timestamp - - if cvs_rev.timestamp > self._last_rev_date: - self._last_rev_date = cvs_rev.timestamp - - def _record_cvs_branch(self, cvs_branch): - self._cvs_branches_count += 1 - self._branch_ids.add(cvs_branch.symbol.id) - - def _record_cvs_tag(self, cvs_tag): - self._cvs_tags_count += 1 - self._tag_ids.add(cvs_tag.symbol.id) - - def record_cvs_item(self, cvs_item): - if isinstance(cvs_item, CVSRevision): - self._record_cvs_rev(cvs_item) - elif isinstance(cvs_item, CVSBranch): - self._record_cvs_branch(cvs_item) - elif isinstance(cvs_item, CVSTag): - self._record_cvs_tag(cvs_item) - else: - raise RuntimeError('Unknown CVSItem type') - - def set_svn_rev_count(self, count): - self._svn_rev_count = count - - def svn_rev_count(self): - return self._svn_rev_count - - def __getstate__(self): - state = self.__dict__.copy() - # This can get kinda large, so we don't store it: - return state - - def archive(self, filename): - f = open(filename, 'wb') - cPickle.dump(self, f) - f.close() - - def __str__(self): - f = StringIO() - f.write('\n') - f.write('cvs2svn Statistics:\n') - f.write('------------------\n') - f.write('Total CVS Files: %10i\n' % (self._repos_file_count,)) - f.write('Total CVS Revisions: %10i\n' % (self._cvs_revs_count,)) - f.write('Total CVS Branches: %10i\n' % (self._cvs_branches_count,)) - f.write('Total CVS Tags: %10i\n' % (self._cvs_tags_count,)) - f.write('Total Unique Tags: %10i\n' % (len(self._tag_ids),)) - f.write('Total Unique Branches: %10i\n' % (len(self._branch_ids),)) - f.write('CVS Repos Size in KB: %10i\n' % ((self._repos_size / 1024),)) - - if self._svn_rev_count is not None: - f.write('Total SVN Commits: %10i\n' % self._svn_rev_count) - - f.write( - 'First Revision Date: %s\n' % (time.ctime(self._first_rev_date),) - ) - f.write( - 'Last Revision Date: %s\n' % (time.ctime(self._last_rev_date),) - ) - f.write('------------------') - - if not self._stats_reflect_exclude: - f.write( - '\n' - '(These are unaltered CVS repository stats and do not\n' - ' reflect tags or branches excluded via --exclude)\n' - ) - - return f.getvalue() - - @staticmethod - def _get_timing_format(value): - # Output times with up to 3 decimal places: - decimals = max(0, 4 - len('%d' % int(value))) - length = len(('%%.%df' % decimals) % value) - return '%%%d.%df' % (length, decimals,) - - def single_pass_timing(self, pass_num): - (pass_name, duration,) = self._pass_timings[pass_num] - format = self._get_timing_format(duration) - time_string = format % (duration,) - return ( - 'Time for pass%d (%s): %s seconds.' - % (pass_num, pass_name, time_string,) - ) - - def timings(self): - passes = self._pass_timings.keys() - passes.sort() - f = StringIO() - f.write('Timings (seconds):\n') - f.write('------------------\n') - - total = 0.0 - for pass_num in passes: - (pass_name, duration,) = self._pass_timings[pass_num] - total += duration - - format = self._get_timing_format(total) - - for pass_num in passes: - (pass_name, duration,) = self._pass_timings[pass_num] - f.write( - (format + ' pass%-2d %s\n') % (duration, pass_num, pass_name,) - ) - - f.write((format + ' total') % total) - return f.getvalue() - - -def read_stats_keeper(filename): - """Factory function: Return a _StatsKeeper instance. - - Read the instance from FILENAME as written by StatsKeeper.archive().""" - - f = open(filename, 'rb') - retval = cPickle.load(f) - f.close() - return retval - diff --git a/cvs2svn_lib/stdout_delegate.py b/cvs2svn_lib/stdout_delegate.py deleted file mode 100644 index 2b4e228..0000000 --- a/cvs2svn_lib/stdout_delegate.py +++ /dev/null @@ -1,107 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2000-2008 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""This module contains database facilities used by cvs2svn.""" - - -from cvs2svn_lib.log import Log -from cvs2svn_lib.svn_repository_delegate import SVNRepositoryDelegate - - -class StdoutDelegate(SVNRepositoryDelegate): - """Makes no changes to the disk, but writes out information to - STDOUT about what is happening in the SVN output. Of course, our - print statements will state that we're doing something, when in - reality, we aren't doing anything other than printing out that we're - doing something. Kind of zen, really.""" - - def __init__(self, total_revs): - self.total_revs = total_revs - - def start_commit(self, revnum, revprops): - """Prints out the Subversion revision number of the commit that is - being started.""" - - Log().verbose("=" * 60) - Log().normal("Starting Subversion r%d / %d" % (revnum, self.total_revs)) - - def end_commit(self): - pass - - def initialize_project(self, project): - Log().verbose(" Initializing project %s" % (project,)) - - def initialize_lod(self, lod): - Log().verbose(" Initializing %s" % (lod,)) - - def mkdir(self, lod, cvs_directory): - Log().verbose( - " New Directory %s" % (lod.get_path(cvs_directory.cvs_path),) - ) - - def add_path(self, s_item): - """Print a line stating what path we are 'adding'.""" - - Log().verbose(" Adding %s" % (s_item.cvs_rev.get_svn_path(),)) - - def change_path(self, s_item): - """Print a line stating what path we are 'changing'.""" - - Log().verbose(" Changing %s" % (s_item.cvs_rev.get_svn_path(),)) - - def delete_lod(self, lod): - """Print a line stating that we are 'deleting' LOD.""" - - Log().verbose(" Deleting %s" % (lod.get_path(),)) - - def delete_path(self, lod, cvs_path): - """Print a line stating that we are 'deleting' PATH.""" - - Log().verbose(" Deleting %s" % (lod.get_path(cvs_path.cvs_path),)) - - def _show_copy(self, src_path, dest_path, src_revnum): - """Print a line stating that we are 'copying' revision SRC_REVNUM - of SRC_PATH to DEST_PATH.""" - - Log().verbose( - " Copying revision %d of %s\n" - " to %s\n" - % (src_revnum, src_path, dest_path,) - ) - - def copy_lod(self, src_lod, dest_lod, src_revnum): - """Print a line stating that we are 'copying' revision SRC_REVNUM - of SRC_PATH to DEST_PATH.""" - - self._show_copy(src_lod.get_path(), dest_lod.get_path(), src_revnum) - - def copy_path(self, cvs_path, src_lod, dest_lod, src_revnum): - """Print a line stating that we are 'copying' revision SRC_REVNUM - of CVS_PATH from SRC_LOD to DEST_LOD.""" - - self._show_copy( - src_lod.get_path(cvs_path.cvs_path), - dest_lod.get_path(cvs_path.cvs_path), - src_revnum, - ) - - def finish(self): - """State that we are done creating our repository.""" - - Log().verbose("Finished creating Subversion repository.") - Log().quiet("Done.") - - diff --git a/cvs2svn_lib/svn_commit.py b/cvs2svn_lib/svn_commit.py deleted file mode 100644 index 25dc38e..0000000 --- a/cvs2svn_lib/svn_commit.py +++ /dev/null @@ -1,381 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2000-2008 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""This module contains the SVNCommit classes. - -There are five types of SVNCommits: - - SVNInitialProjectCommit -- Initializes a project (creates its trunk, - branches, and tags directories). - - SVNPrimaryCommit -- Commits one or more CVSRevisions on one or more - lines of development. - - SVNBranchCommit -- Creates or fills a branch; that is, copies files - from a source line of development to a target branch. - - SVNTagCommit -- Creates or fills a tag; that is, copies files from a - source line of development to a target tag. - - SVNPostCommit -- Updates trunk to reflect changes on a non-trunk - default branch. - -""" - - -import textwrap - -from cvs2svn_lib.common import InternalError -from cvs2svn_lib.context import Ctx -from cvs2svn_lib.symbol import Branch -from cvs2svn_lib.symbol import Tag - - -class SVNCommit: - """This represents one commit to the Subversion Repository.""" - - # textwrap.TextWrapper instance to be used for wrapping log messages: - text_wrapper = textwrap.TextWrapper(width=76) - - def __init__(self, date, revnum): - """Instantiate an SVNCommit. - - REVNUM is the SVN revision number of this commit.""" - - # The date of the commit, as an integer. While the SVNCommit is - # being built up, this contains the latest date seen so far. This - # member is set externally. - self.date = date - - # The SVN revision number of this commit, as an integer. - self.revnum = revnum - - def __getstate__(self): - return (self.date, self.revnum,) - - def __setstate__(self, state): - (self.date, self.revnum,) = state - - def get_cvs_items(self): - """Return a list containing the CVSItems in this commit.""" - - raise NotImplementedError() - - def get_author(self): - """Return the author or this commit, or None if none is to be used. - - The return value is exactly as the author appeared in the RCS - file, with undefined character encoding.""" - - raise NotImplementedError() - - def get_log_msg(self): - """Return a log message for this commit. - - The return value is exactly as the log message appeared in the RCS - file, with undefined character encoding.""" - - raise NotImplementedError() - - def get_warning_summary(self): - """Return a summary of this commit that can be used in warnings.""" - - return '(subversion rev %s)' % (self.revnum,) - - def get_description(self): - """Return a partial description of this SVNCommit, for logging.""" - - raise NotImplementedError() - - def output(self, output_option): - """Cause this commit to be output to OUTPUT_OPTION. - - This method is used for double-dispatch. Derived classes should - call the OutputOption.process_*_commit() method appropriate for - the type of SVNCommit.""" - - raise NotImplementedError() - - def __str__(self): - """ Print a human-readable description of this SVNCommit. - - This description is not intended to be machine-parseable.""" - - ret = "SVNCommit #: " + str(self.revnum) + "\n" - ret += " debug description: " + self.get_description() + "\n" - return ret - - -class SVNInitialProjectCommit(SVNCommit): - def __init__(self, date, projects, revnum): - SVNCommit.__init__(self, date, revnum) - self.projects = list(projects) - - def __getstate__(self): - return ( - SVNCommit.__getstate__(self), - [project.id for project in self.projects], - ) - - def __setstate__(self, state): - (svn_commit_state, project_ids,) = state - SVNCommit.__setstate__(self, svn_commit_state) - self.projects = [ - Ctx()._projects[project_id] for project_id in project_ids - ] - - def get_cvs_items(self): - return [] - - def get_author(self): - return Ctx().username - - def get_log_msg(self): - return self.text_wrapper.fill( - Ctx().initial_project_commit_message % {} - ) - - def get_description(self): - return 'Project initialization' - - def output(self, output_option): - output_option.process_initial_project_commit(self) - - -class SVNRevisionCommit(SVNCommit): - """A SVNCommit that includes actual CVS revisions.""" - - def __init__(self, cvs_revs, date, revnum): - SVNCommit.__init__(self, date, revnum) - - self.cvs_revs = list(cvs_revs) - - # This value is set lazily by _get_metadata(): - self._metadata = None - - def __getstate__(self): - """Return the part of the state represented by this mixin.""" - - return ( - SVNCommit.__getstate__(self), - [cvs_rev.id for cvs_rev in self.cvs_revs], - ) - - def __setstate__(self, state): - """Restore the part of the state represented by this mixin.""" - - (svn_commit_state, cvs_rev_ids) = state - SVNCommit.__setstate__(self, svn_commit_state) - - self.cvs_revs = [ - cvs_rev - for (id, cvs_rev) in Ctx()._cvs_items_db.get_many(cvs_rev_ids) - ] - self._metadata = None - - def get_cvs_items(self): - return self.cvs_revs - - def _get_metadata(self): - """Return the Metadata instance for this commit.""" - - if self._metadata is None: - # Set self._metadata for this commit from that of the first cvs - # revision. - if not self.cvs_revs: - raise InternalError('SVNPrimaryCommit contains no CVS revisions') - - metadata_id = self.cvs_revs[0].metadata_id - self._metadata = Ctx()._metadata_db[metadata_id] - - return self._metadata - - def get_author(self): - return self._get_metadata().author - - def get_warning_summary(self): - retval = [] - retval.append(SVNCommit.get_warning_summary(self) + ' Related files:') - for cvs_rev in self.cvs_revs: - retval.append(' ' + cvs_rev.cvs_file.filename) - return '\n'.join(retval) - - def __str__(self): - """Return the revision part of a description of this SVNCommit. - - Derived classes should append the output of this method to the - output of SVNCommit.__str__().""" - - ret = [] - ret.append(SVNCommit.__str__(self)) - ret.append(' cvs_revs:\n') - for cvs_rev in self.cvs_revs: - ret.append(' %x\n' % (cvs_rev.id,)) - return ''.join(ret) - - -class SVNPrimaryCommit(SVNRevisionCommit): - def __init__(self, cvs_revs, date, revnum): - SVNRevisionCommit.__init__(self, cvs_revs, date, revnum) - - def get_log_msg(self): - """Return the actual log message for this commit.""" - - return self._get_metadata().log_msg - - def get_description(self): - return 'commit' - - def output(self, output_option): - output_option.process_primary_commit(self) - - -class SVNPostCommit(SVNRevisionCommit): - def __init__(self, motivating_revnum, cvs_revs, date, revnum): - SVNRevisionCommit.__init__(self, cvs_revs, date, revnum) - - # The subversion revision number of the *primary* commit where the - # default branch changes actually happened. (NOTE: Secondary - # commits that fill branches and tags also have a motivating - # commit, but we do not record it because it is (currently) not - # needed for anything.) motivating_revnum is used when generating - # the log message for the commit that synchronizes the default - # branch with trunk. - # - # It is possible for multiple synchronization commits to refer to - # the same motivating commit revision number, and it is possible - # for a single synchronization commit to contain CVSRevisions on - # multiple different default branches. - self.motivating_revnum = motivating_revnum - - def __getstate__(self): - return ( - SVNRevisionCommit.__getstate__(self), - self.motivating_revnum, - ) - - def __setstate__(self, state): - (rev_state, self.motivating_revnum,) = state - SVNRevisionCommit.__setstate__(self, rev_state) - - def get_cvs_items(self): - # It might seem that we should return - # SVNRevisionCommit.get_cvs_items(self) here, but this commit - # doesn't really include those CVSItems, but rather followup - # commits to those. - return [] - - def get_log_msg(self): - """Return a manufactured log message for this commit.""" - - return self.text_wrapper.fill( - Ctx().post_commit_message % {'revnum' : self.motivating_revnum} - ) - - def get_description(self): - return 'post-commit default branch(es)' - - def output(self, output_option): - output_option.process_post_commit(self) - - -class SVNSymbolCommit(SVNCommit): - def __init__(self, symbol, cvs_symbol_ids, date, revnum): - SVNCommit.__init__(self, date, revnum) - - # The TypedSymbol that is filled in this SVNCommit. - self.symbol = symbol - - self.cvs_symbol_ids = cvs_symbol_ids - - def __getstate__(self): - return ( - SVNCommit.__getstate__(self), - self.symbol.id, self.cvs_symbol_ids, - ) - - def __setstate__(self, state): - (svn_commit_state, symbol_id, self.cvs_symbol_ids) = state - SVNCommit.__setstate__(self, svn_commit_state) - self.symbol = Ctx()._symbol_db.get_symbol(symbol_id) - - def get_cvs_items(self): - return [ - cvs_symbol - for (id, cvs_symbol) - in Ctx()._cvs_items_db.get_many(self.cvs_symbol_ids) - ] - - def _get_symbol_type(self): - """Return the type of the self.symbol ('branch' or 'tag').""" - - raise NotImplementedError() - - def get_author(self): - return Ctx().username - - def get_log_msg(self): - """Return a manufactured log message for this commit.""" - - return self.text_wrapper.fill( - Ctx().symbol_commit_message % { - 'symbol_type' : self._get_symbol_type(), - 'symbol_name' : self.symbol.name, - } - ) - - def get_description(self): - return 'copying to %s %r' % (self._get_symbol_type(), self.symbol.name,) - - def __str__(self): - """ Print a human-readable description of this SVNCommit. - - This description is not intended to be machine-parseable.""" - - return ( - SVNCommit.__str__(self) - + " symbolic name: %s\n" % (self.symbol.name,) - ) - - -class SVNBranchCommit(SVNSymbolCommit): - def __init__(self, symbol, cvs_symbol_ids, date, revnum): - if not isinstance(symbol, Branch): - raise InternalError('Incorrect symbol type %r' % (symbol,)) - - SVNSymbolCommit.__init__(self, symbol, cvs_symbol_ids, date, revnum) - - def _get_symbol_type(self): - return 'branch' - - def output(self, output_option): - output_option.process_branch_commit(self) - - -class SVNTagCommit(SVNSymbolCommit): - def __init__(self, symbol, cvs_symbol_ids, date, revnum): - if not isinstance(symbol, Tag): - raise InternalError('Incorrect symbol type %r' % (symbol,)) - - SVNSymbolCommit.__init__(self, symbol, cvs_symbol_ids, date, revnum) - - def _get_symbol_type(self): - return 'tag' - - def output(self, output_option): - output_option.process_tag_commit(self) - - diff --git a/cvs2svn_lib/svn_commit_creator.py b/cvs2svn_lib/svn_commit_creator.py deleted file mode 100644 index c87db38..0000000 --- a/cvs2svn_lib/svn_commit_creator.py +++ /dev/null @@ -1,217 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2000-2008 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""This module contains the SVNCommitCreator class.""" - - -import time - -from cvs2svn_lib.common import InternalError -from cvs2svn_lib.log import Log -from cvs2svn_lib.context import Ctx -from cvs2svn_lib.cvs_item import CVSRevisionNoop -from cvs2svn_lib.cvs_item import CVSBranchNoop -from cvs2svn_lib.cvs_item import CVSTagNoop -from cvs2svn_lib.changeset import OrderedChangeset -from cvs2svn_lib.changeset import BranchChangeset -from cvs2svn_lib.changeset import TagChangeset -from cvs2svn_lib.svn_commit import SVNInitialProjectCommit -from cvs2svn_lib.svn_commit import SVNPrimaryCommit -from cvs2svn_lib.svn_commit import SVNPostCommit -from cvs2svn_lib.svn_commit import SVNBranchCommit -from cvs2svn_lib.svn_commit import SVNTagCommit -from cvs2svn_lib.key_generator import KeyGenerator - - -class SVNCommitCreator: - """This class creates and yields SVNCommits via process_changeset().""" - - def __init__(self): - # The revision number to assign to the next new SVNCommit. - self.revnum_generator = KeyGenerator() - - # A set containing the Projects that have already been - # initialized: - self._initialized_projects = set() - - def _post_commit(self, cvs_revs, motivating_revnum, timestamp): - """Generate any SVNCommits needed to follow CVS_REVS. - - That is, handle non-trunk default branches. A revision on a CVS - non-trunk default branch is visible in a default CVS checkout of - HEAD. So we copy such commits over to Subversion's trunk so that - checking out SVN trunk gives the same output as checking out of - CVS's default branch.""" - - cvs_revs = [ - cvs_rev - for cvs_rev in cvs_revs - if cvs_rev.ntdbr and not isinstance(cvs_rev, CVSRevisionNoop) - ] - - if cvs_revs: - cvs_revs.sort( - lambda a, b: cmp(a.cvs_file.filename, b.cvs_file.filename) - ) - # Generate an SVNCommit for all of our default branch cvs_revs. - yield SVNPostCommit( - motivating_revnum, cvs_revs, timestamp, - self.revnum_generator.gen_id(), - ) - - def _process_revision_changeset(self, changeset, timestamp): - """Process CHANGESET, using TIMESTAMP as the commit time. - - Create and yield one or more SVNCommits in the process. CHANGESET - must be an OrderedChangeset. TIMESTAMP is used as the timestamp - for any resulting SVNCommits.""" - - if not changeset.cvs_item_ids: - Log().warn('Changeset has no items: %r' % changeset) - return - - Log().verbose('-' * 60) - Log().verbose('CVS Revision grouping:') - Log().verbose(' Time: %s' % time.ctime(timestamp)) - - # Generate an SVNCommit unconditionally. Even if the only change in - # this group of CVSRevisions is a deletion of an already-deleted - # file (that is, a CVS revision in state 'dead' whose predecessor - # was also in state 'dead'), the conversion will still generate a - # Subversion revision containing the log message for the second dead - # revision, because we don't want to lose that information. - - cvs_revs = list(changeset.iter_cvs_items()) - if cvs_revs: - cvs_revs.sort(lambda a, b: cmp(a.cvs_file.filename, b.cvs_file.filename)) - svn_commit = SVNPrimaryCommit( - cvs_revs, timestamp, self.revnum_generator.gen_id() - ) - - yield svn_commit - - for cvs_rev in cvs_revs: - Ctx()._symbolings_logger.log_revision(cvs_rev, svn_commit.revnum) - - # Generate an SVNPostCommit if we have default branch revs. If - # some of the revisions in this commit happened on a non-trunk - # default branch, then those files have to be copied into trunk - # manually after being changed on the branch (because the RCS - # "default branch" appears as head, i.e., trunk, in practice). - # Unfortunately, Subversion doesn't support copies with sources - # in the current txn. All copies must be based in committed - # revisions. Therefore, we generate the copies in a new - # revision. - for svn_post_commit in self._post_commit( - cvs_revs, svn_commit.revnum, timestamp - ): - yield svn_post_commit - - def _process_tag_changeset(self, changeset, timestamp): - """Process TagChangeset CHANGESET, producing a SVNTagCommit. - - Filter out CVSTagNoops. If no CVSTags are left, don't generate a - SVNTagCommit.""" - - if Ctx().trunk_only: - raise InternalError( - 'TagChangeset encountered during a --trunk-only conversion') - - cvs_tag_ids = [ - cvs_tag.id - for cvs_tag in changeset.iter_cvs_items() - if not isinstance(cvs_tag, CVSTagNoop) - ] - if cvs_tag_ids: - yield SVNTagCommit( - changeset.symbol, cvs_tag_ids, timestamp, - self.revnum_generator.gen_id(), - ) - else: - Log().debug( - 'Omitting %r because it contains only CVSTagNoops' % (changeset,) - ) - - def _process_branch_changeset(self, changeset, timestamp): - """Process BranchChangeset CHANGESET, producing a SVNBranchCommit. - - Filter out CVSBranchNoops. If no CVSBranches are left, don't - generate a SVNBranchCommit.""" - - if Ctx().trunk_only: - raise InternalError( - 'BranchChangeset encountered during a --trunk-only conversion') - - cvs_branches = [ - cvs_branch - for cvs_branch in changeset.iter_cvs_items() - if not isinstance(cvs_branch, CVSBranchNoop) - ] - if cvs_branches: - svn_commit = SVNBranchCommit( - changeset.symbol, - [cvs_branch.id for cvs_branch in cvs_branches], - timestamp, - self.revnum_generator.gen_id(), - ) - yield svn_commit - for cvs_branch in cvs_branches: - Ctx()._symbolings_logger.log_branch_revision( - cvs_branch, svn_commit.revnum - ) - else: - Log().debug( - 'Omitting %r because it contains only CVSBranchNoops' % (changeset,) - ) - - def process_changeset(self, changeset, timestamp): - """Process CHANGESET, using TIMESTAMP for all of its entries. - - Return a generator that generates the resulting SVNCommits. - - The changesets must be fed to this function in proper dependency - order.""" - - # First create any new projects that might be opened by the - # changeset: - projects_opened = \ - changeset.get_projects_opened() - self._initialized_projects - if projects_opened: - if Ctx().cross_project_commits: - yield SVNInitialProjectCommit( - timestamp, projects_opened, self.revnum_generator.gen_id() - ) - else: - for project in projects_opened: - yield SVNInitialProjectCommit( - timestamp, [project], self.revnum_generator.gen_id() - ) - self._initialized_projects.update(projects_opened) - - if isinstance(changeset, OrderedChangeset): - for svn_commit \ - in self._process_revision_changeset(changeset, timestamp): - yield svn_commit - elif isinstance(changeset, TagChangeset): - for svn_commit in self._process_tag_changeset(changeset, timestamp): - yield svn_commit - elif isinstance(changeset, BranchChangeset): - for svn_commit in self._process_branch_changeset(changeset, timestamp): - yield svn_commit - else: - raise TypeError('Illegal changeset %r' % changeset) - - diff --git a/cvs2svn_lib/svn_commit_item.py b/cvs2svn_lib/svn_commit_item.py deleted file mode 100644 index 8bc9015..0000000 --- a/cvs2svn_lib/svn_commit_item.py +++ /dev/null @@ -1,50 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2000-2008 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""This module contains class SVNCommitItem.""" - - -from cvs2svn_lib.context import Ctx - - -class SVNCommitItem: - """A wrapper class for CVSRevision objects upon which - Subversion-related data (such as properties) may be hung.""" - - def __init__(self, cvs_rev, svn_props_changed): - """Initialize instance and record the properties for this file. - SVN_PROPS_CHANGED indicates whether the svn: properties are known - to have changed since the last revision. - - The properties are set by the SVNPropertySetters in - Ctx().svn_property_setters.""" - - self.cvs_rev = cvs_rev - # Did the svn properties change for this file (i.e., do they have - # to be written to the dumpfile?) - self.svn_props_changed = svn_props_changed - - # The properties for this item as a map { key : value }. If VALUE - # is None, the property should be left unset. - self.svn_props = { } - - for svn_property_setter in Ctx().svn_property_setters: - svn_property_setter.set_properties(self) - - def has_keywords(self): - return bool(self.svn_props.get('svn:keywords', None)) - - diff --git a/cvs2svn_lib/svn_output_option.py b/cvs2svn_lib/svn_output_option.py deleted file mode 100644 index 86d1ba4..0000000 --- a/cvs2svn_lib/svn_output_option.py +++ /dev/null @@ -1,753 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2000-2009 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""Classes for outputting the converted repository to SVN.""" - - -import os - -from cvs2svn_lib import config -from cvs2svn_lib.common import InternalError -from cvs2svn_lib.common import FatalError -from cvs2svn_lib.common import FatalException -from cvs2svn_lib.common import error_prefix -from cvs2svn_lib.common import format_date -from cvs2svn_lib.common import PathsNotDisjointException -from cvs2svn_lib.common import verify_paths_disjoint -from cvs2svn_lib.log import Log -from cvs2svn_lib.context import Ctx -from cvs2svn_lib.artifact_manager import artifact_manager -from cvs2svn_lib.process import CommandFailedException -from cvs2svn_lib.process import check_command_runs -from cvs2svn_lib.process import call_command -from cvs2svn_lib.cvs_file import CVSDirectory -from cvs2svn_lib.symbol import Trunk -from cvs2svn_lib.symbol import LineOfDevelopment -from cvs2svn_lib.cvs_item import CVSRevisionAdd -from cvs2svn_lib.cvs_item import CVSRevisionChange -from cvs2svn_lib.cvs_item import CVSRevisionDelete -from cvs2svn_lib.cvs_item import CVSRevisionNoop -from cvs2svn_lib.repository_mirror import RepositoryMirror -from cvs2svn_lib.repository_mirror import PathExistsError -from cvs2svn_lib.svn_commit_item import SVNCommitItem -from cvs2svn_lib.openings_closings import SymbolingsReader -from cvs2svn_lib.fill_source import get_source_set -from cvs2svn_lib.stdout_delegate import StdoutDelegate -from cvs2svn_lib.dumpfile_delegate import DumpfileDelegate -from cvs2svn_lib.repository_delegate import RepositoryDelegate -from cvs2svn_lib.output_option import OutputOption - - -class SVNOutputOption(OutputOption): - """An OutputOption appropriate for output to Subversion.""" - - class ParentMissingError(Exception): - """The parent of a path is missing. - - Exception raised if an attempt is made to add a path to the - repository mirror but the parent's path doesn't exist in the - youngest revision of the repository.""" - - pass - - class ExpectedDirectoryError(Exception): - """A file was found where a directory was expected.""" - - pass - - def __init__(self, author_transforms=None): - self._mirror = RepositoryMirror() - - def to_utf8(s): - if isinstance(s, unicode): - return s.encode('utf8') - else: - return s - - self.author_transforms = {} - if author_transforms is not None: - for (cvsauthor, name) in author_transforms.iteritems(): - cvsauthor = to_utf8(cvsauthor) - name = to_utf8(name) - self.author_transforms[cvsauthor] = name - - def register_artifacts(self, which_pass): - # These artifacts are needed for SymbolingsReader: - artifact_manager.register_temp_file_needed( - config.SYMBOL_OPENINGS_CLOSINGS_SORTED, which_pass - ) - artifact_manager.register_temp_file_needed( - config.SYMBOL_OFFSETS_DB, which_pass - ) - - self._mirror.register_artifacts(which_pass) - Ctx().revision_reader.register_artifacts(which_pass) - - def check_symbols(self, symbol_map): - """Check that the paths of all included LODs are set and disjoint.""" - - error_found = False - - # Check that all included LODs have their base paths set, and - # collect the paths into a list: - paths = [] - for lod in symbol_map.itervalues(): - if isinstance(lod, LineOfDevelopment): - if lod.base_path is None: - Log().error('%s: No path was set for %r\n' % (error_prefix, lod,)) - error_found = True - else: - paths.append(lod.base_path) - - # Check that the SVN paths of all LODS are disjoint: - try: - verify_paths_disjoint(*paths) - except PathsNotDisjointException, e: - Log().error(str(e)) - error_found = True - - if error_found: - raise FatalException( - 'Please fix the above errors and restart CollateSymbolsPass' - ) - - def setup(self, svn_rev_count): - self._symbolings_reader = SymbolingsReader() - self._mirror.open() - self._delegates = [] - Ctx().revision_reader.start() - self.add_delegate(StdoutDelegate(svn_rev_count)) - - def _get_author(self, svn_commit): - author = svn_commit.get_author() - name = self.author_transforms.get(author, author) - return name - - def _get_revprops(self, svn_commit): - """Return the Subversion revprops for this SVNCommit.""" - - return { - 'svn:author' : self._get_author(svn_commit), - 'svn:log' : svn_commit.get_log_msg(), - 'svn:date' : format_date(svn_commit.date), - } - - def start_commit(self, revnum, revprops): - """Start a new commit.""" - - self._mirror.start_commit(revnum) - self._invoke_delegates('start_commit', revnum, revprops) - - def end_commit(self): - """Called at the end of each commit. - - This method copies the newly created nodes to the on-disk nodes - db.""" - - self._mirror.end_commit() - self._invoke_delegates('end_commit') - - def delete_lod(self, lod): - """Delete the main path for LOD from the tree. - - The path must currently exist. Silently refuse to delete trunk - paths.""" - - if isinstance(lod, Trunk): - # Never delete a Trunk path. - return - - self._mirror.get_current_lod_directory(lod).delete() - self._invoke_delegates('delete_lod', lod) - - def delete_path(self, cvs_path, lod, should_prune=False): - """Delete CVS_PATH from LOD.""" - - if cvs_path.parent_directory is None: - self.delete_lod(lod) - return - - parent_node = self._mirror.get_current_path( - cvs_path.parent_directory, lod - ) - del parent_node[cvs_path] - self._invoke_delegates('delete_path', lod, cvs_path) - - if should_prune: - while parent_node is not None and len(parent_node) == 0: - # A drawback of this code is that we issue a delete for each - # path and not just a single delete for the topmost directory - # pruned. - node = parent_node - cvs_path = node.cvs_path - if cvs_path.parent_directory is None: - parent_node = None - self.delete_lod(lod) - else: - parent_node = node.parent_mirror_dir - node.delete() - self._invoke_delegates('delete_path', lod, cvs_path) - - def initialize_project(self, project): - """Create the basic structure for PROJECT.""" - - self._invoke_delegates('initialize_project', project) - - # Don't invoke delegates. - self._mirror.add_lod(project.get_trunk()) - - def change_path(self, cvs_rev): - """Register a change in self._youngest for the CVS_REV's svn_path.""" - - # We do not have to update the nodes because our mirror is only - # concerned with the presence or absence of paths, and a file - # content change does not cause any path changes. - self._invoke_delegates('change_path', SVNCommitItem(cvs_rev, False)) - - def _mkdir_p(self, cvs_directory, lod): - """Make sure that CVS_DIRECTORY exists in LOD. - - If not, create it, calling delegates. Return the node for - CVS_DIRECTORY.""" - - try: - node = self._mirror.get_current_lod_directory(lod) - except KeyError: - node = self._mirror.add_lod(lod) - self._invoke_delegates('initialize_lod', lod) - - for sub_path in cvs_directory.get_ancestry()[1:]: - try: - node = node[sub_path] - except KeyError: - node = node.mkdir(sub_path) - self._invoke_delegates('mkdir', lod, sub_path) - if node is None: - raise self.ExpectedDirectoryError( - 'File found at \'%s\' where directory was expected.' % (sub_path,) - ) - - return node - - def add_path(self, cvs_rev): - """Add the CVS_REV's svn_path to the repository mirror. - - Create any missing intermediate paths.""" - - cvs_file = cvs_rev.cvs_file - parent_path = cvs_file.parent_directory - lod = cvs_rev.lod - parent_node = self._mkdir_p(parent_path, lod) - parent_node.add_file(cvs_file) - self._invoke_delegates('add_path', SVNCommitItem(cvs_rev, True)) - - def copy_lod(self, src_lod, dest_lod, src_revnum): - """Copy all of SRC_LOD at SRC_REVNUM to DST_LOD. - - In the youngest revision of the repository, the destination LOD - *must not* already exist. - - Return the new node at DEST_LOD. Note that this node is not - necessarily writable, though its parent node necessarily is.""" - - node = self._mirror.copy_lod(src_lod, dest_lod, src_revnum) - self._invoke_delegates('copy_lod', src_lod, dest_lod, src_revnum) - return node - - def copy_path( - self, cvs_path, src_lod, dest_lod, src_revnum, create_parent=False - ): - """Copy CVS_PATH from SRC_LOD at SRC_REVNUM to DST_LOD. - - In the youngest revision of the repository, the destination's - parent *must* exist unless CREATE_PARENT is specified. But the - destination itself *must not* exist. - - Return the new node at (CVS_PATH, DEST_LOD), as a - CurrentMirrorDirectory.""" - - if cvs_path.parent_directory is None: - return self.copy_lod(src_lod, dest_lod, src_revnum) - - # Get the node of our source, or None if it is a file: - src_node = self._mirror.get_old_path(cvs_path, src_lod, src_revnum) - - # Get the parent path of the destination: - if create_parent: - dest_parent_node = self._mkdir_p(cvs_path.parent_directory, dest_lod) - else: - try: - dest_parent_node = self._mirror.get_current_path( - cvs_path.parent_directory, dest_lod - ) - except KeyError: - raise self.ParentMissingError( - 'Attempt to add path \'%s\' to repository mirror, ' - 'but its parent directory doesn\'t exist in the mirror.' - % (dest_lod.get_path(cvs_path.cvs_path),) - ) - - if cvs_path in dest_parent_node: - raise PathExistsError( - 'Attempt to add path \'%s\' to repository mirror ' - 'when it already exists in the mirror.' - % (dest_lod.get_path(cvs_path.cvs_path),) - ) - - dest_parent_node[cvs_path] = src_node - self._invoke_delegates( - 'copy_path', - cvs_path, src_lod, dest_lod, src_revnum - ) - - return dest_parent_node[cvs_path] - - def fill_symbol(self, svn_symbol_commit, fill_source): - """Perform all copies for the CVSSymbols in SVN_SYMBOL_COMMIT. - - The symbolic name is guaranteed to exist in the Subversion - repository by the end of this call, even if there are no paths - under it.""" - - symbol = svn_symbol_commit.symbol - - try: - dest_node = self._mirror.get_current_lod_directory(symbol) - except KeyError: - self._fill_directory(symbol, None, fill_source, None) - else: - self._fill_directory(symbol, dest_node, fill_source, None) - - def _fill_directory(self, symbol, dest_node, fill_source, parent_source): - """Fill the tag or branch SYMBOL at the path indicated by FILL_SOURCE. - - Use items from FILL_SOURCE, and recurse into the child items. - - Fill SYMBOL starting at the path FILL_SOURCE.cvs_path. DEST_NODE - is the node of this destination path, or None if the destination - does not yet exist. All directories above this path have already - been filled. FILL_SOURCE is a FillSource instance describing the - items within a subtree of the repository that still need to be - copied to the destination. - - PARENT_SOURCE is the SVNRevisionRange that was used to copy the - parent directory, if it was copied in this commit. We prefer to - copy from the same source as was used for the parent, since it - typically requires less touching-up. If PARENT_SOURCE is None, - then the parent directory was not copied in this commit, so no - revision is preferable to any other.""" - - copy_source = fill_source.compute_best_source(parent_source) - - # Figure out if we shall copy to this destination and delete any - # destination path that is in the way. - if dest_node is None: - # The destination does not exist at all, so it definitely has to - # be copied: - dest_node = self.copy_path( - fill_source.cvs_path, copy_source.source_lod, - symbol, copy_source.opening_revnum - ) - elif (parent_source is not None) and ( - copy_source.source_lod != parent_source.source_lod - or copy_source.opening_revnum != parent_source.opening_revnum - ): - # The parent path was copied from a different source than we - # need to use, so we have to delete the version that was copied - # with the parent then re-copy from the correct source: - self.delete_path(fill_source.cvs_path, symbol) - dest_node = self.copy_path( - fill_source.cvs_path, copy_source.source_lod, - symbol, copy_source.opening_revnum - ) - else: - copy_source = parent_source - - # The map {CVSPath : FillSource} of entries within this directory - # that need filling: - src_entries = fill_source.get_subsource_map() - - if copy_source is not None: - self._prune_extra_entries( - fill_source.cvs_path, symbol, dest_node, src_entries - ) - - return self._cleanup_filled_directory( - symbol, dest_node, src_entries, copy_source - ) - - def _cleanup_filled_directory( - self, symbol, dest_node, src_entries, copy_source - ): - """The directory at DEST_NODE has been filled and pruned; recurse. - - Recurse into the SRC_ENTRIES, in alphabetical order. If DEST_NODE - was copied in this revision, COPY_SOURCE should indicate where it - was copied from; otherwise, COPY_SOURCE should be None.""" - - cvs_paths = src_entries.keys() - cvs_paths.sort() - for cvs_path in cvs_paths: - if isinstance(cvs_path, CVSDirectory): - # Path is a CVSDirectory: - try: - dest_subnode = dest_node[cvs_path] - except KeyError: - # Path doesn't exist yet; it has to be created: - dest_node = self._fill_directory( - symbol, None, src_entries[cvs_path], None - ).parent_mirror_dir - else: - # Path already exists, but might have to be cleaned up: - dest_node = self._fill_directory( - symbol, dest_subnode, src_entries[cvs_path], copy_source - ).parent_mirror_dir - else: - # Path is a CVSFile: - self._fill_file( - symbol, cvs_path in dest_node, src_entries[cvs_path], copy_source - ) - # Reread dest_node since the call to _fill_file() might have - # made it writable: - dest_node = self._mirror.get_current_path( - dest_node.cvs_path, dest_node.lod - ) - - return dest_node - - def _fill_file(self, symbol, dest_existed, fill_source, parent_source): - """Fill the tag or branch SYMBOL at the path indicated by FILL_SOURCE. - - Use items from FILL_SOURCE. - - Fill SYMBOL at path FILL_SOURCE.cvs_path. DEST_NODE is the node - of this destination path, or None if the destination does not yet - exist. All directories above this path have already been filled - as needed. FILL_SOURCE is a FillSource instance describing the - item that needs to be copied to the destination. - - PARENT_SOURCE is the source from which the parent directory was - copied, or None if the parent directory was not copied during this - commit. We prefer to copy from PARENT_SOURCE, since it typically - requires less touching-up. If PARENT_SOURCE is None, then the - parent directory was not copied in this commit, so no revision is - preferable to any other.""" - - copy_source = fill_source.compute_best_source(parent_source) - - # Figure out if we shall copy to this destination and delete any - # destination path that is in the way. - if not dest_existed: - # The destination does not exist at all, so it definitely has to - # be copied: - self.copy_path( - fill_source.cvs_path, copy_source.source_lod, - symbol, copy_source.opening_revnum - ) - elif (parent_source is not None) and ( - copy_source.source_lod != parent_source.source_lod - or copy_source.opening_revnum != parent_source.opening_revnum - ): - # The parent path was copied from a different source than we - # need to use, so we have to delete the version that was copied - # with the parent and then re-copy from the correct source: - self.delete_path(fill_source.cvs_path, symbol) - self.copy_path( - fill_source.cvs_path, copy_source.source_lod, - symbol, copy_source.opening_revnum - ) - - def _prune_extra_entries( - self, dest_cvs_path, symbol, dest_node, src_entries - ): - """Delete any entries in DEST_NODE that are not in SRC_ENTRIES.""" - - delete_list = [ - cvs_path - for cvs_path in dest_node - if cvs_path not in src_entries - ] - - # Sort the delete list so that the output is in a consistent - # order: - delete_list.sort() - for cvs_path in delete_list: - del dest_node[cvs_path] - self._invoke_delegates('delete_path', symbol, cvs_path) - - def add_delegate(self, delegate): - """Adds DELEGATE to self._delegates. - - For every delegate you add, whenever a repository action method is - performed, delegate's corresponding repository action method is - called. Multiple delegates will be called in the order that they - are added. See SVNRepositoryDelegate for more information.""" - - self._delegates.append(delegate) - - def _invoke_delegates(self, method, *args): - """Invoke a method on each delegate. - - Iterate through each of our delegates, in the order that they were - added, and call the delegate's method named METHOD with the - arguments in ARGS.""" - - for delegate in self._delegates: - getattr(delegate, method)(*args) - - def process_initial_project_commit(self, svn_commit): - self.start_commit(svn_commit.revnum, self._get_revprops(svn_commit)) - - for project in svn_commit.projects: - self.initialize_project(project) - - self.end_commit() - - def process_primary_commit(self, svn_commit): - self.start_commit(svn_commit.revnum, self._get_revprops(svn_commit)) - - # This actually commits CVSRevisions - if len(svn_commit.cvs_revs) > 1: - plural = "s" - else: - plural = "" - Log().verbose("Committing %d CVSRevision%s" - % (len(svn_commit.cvs_revs), plural)) - for cvs_rev in svn_commit.cvs_revs: - if isinstance(cvs_rev, CVSRevisionNoop): - pass - - elif isinstance(cvs_rev, CVSRevisionDelete): - self.delete_path(cvs_rev.cvs_file, cvs_rev.lod, Ctx().prune) - - elif isinstance(cvs_rev, CVSRevisionAdd): - self.add_path(cvs_rev) - - elif isinstance(cvs_rev, CVSRevisionChange): - self.change_path(cvs_rev) - - self.end_commit() - - def process_post_commit(self, svn_commit): - self.start_commit(svn_commit.revnum, self._get_revprops(svn_commit)) - - Log().verbose( - 'Synchronizing default branch motivated by %d' - % (svn_commit.motivating_revnum,) - ) - - for cvs_rev in svn_commit.cvs_revs: - trunk = cvs_rev.cvs_file.project.get_trunk() - if isinstance(cvs_rev, CVSRevisionAdd): - # Copy from branch to trunk: - self.copy_path( - cvs_rev.cvs_file, cvs_rev.lod, trunk, - svn_commit.motivating_revnum, True - ) - elif isinstance(cvs_rev, CVSRevisionChange): - # Delete old version of the path on trunk... - self.delete_path(cvs_rev.cvs_file, trunk) - # ...and copy the new version over from branch: - self.copy_path( - cvs_rev.cvs_file, cvs_rev.lod, trunk, - svn_commit.motivating_revnum, True - ) - elif isinstance(cvs_rev, CVSRevisionDelete): - # Delete trunk path: - self.delete_path(cvs_rev.cvs_file, trunk) - elif isinstance(cvs_rev, CVSRevisionNoop): - # Do nothing - pass - else: - raise InternalError('Unexpected CVSRevision type: %s' % (cvs_rev,)) - - self.end_commit() - - def process_branch_commit(self, svn_commit): - self.start_commit(svn_commit.revnum, self._get_revprops(svn_commit)) - Log().verbose('Filling branch:', svn_commit.symbol.name) - - # Get the set of sources for the symbolic name: - source_set = get_source_set( - svn_commit.symbol, - self._symbolings_reader.get_range_map(svn_commit), - ) - - self.fill_symbol(svn_commit, source_set) - - self.end_commit() - - def process_tag_commit(self, svn_commit): - self.start_commit(svn_commit.revnum, self._get_revprops(svn_commit)) - Log().verbose('Filling tag:', svn_commit.symbol.name) - - # Get the set of sources for the symbolic name: - source_set = get_source_set( - svn_commit.symbol, - self._symbolings_reader.get_range_map(svn_commit), - ) - - self.fill_symbol(svn_commit, source_set) - - self.end_commit() - - def cleanup(self): - self._invoke_delegates('finish') - self._mirror.close() - self._mirror = None - Ctx().revision_reader.finish() - self._symbolings_reader.close() - del self._symbolings_reader - - -class DumpfileOutputOption(SVNOutputOption): - """Output the result of the conversion into a dumpfile.""" - - def __init__(self, dumpfile_path, author_transforms=None): - SVNOutputOption.__init__(self, author_transforms) - self.dumpfile_path = dumpfile_path - - def check(self): - pass - - def setup(self, svn_rev_count): - Log().quiet("Starting Subversion Dumpfile.") - SVNOutputOption.setup(self, svn_rev_count) - if not Ctx().dry_run: - self.add_delegate( - DumpfileDelegate(Ctx().revision_reader, self.dumpfile_path) - ) - - -class RepositoryOutputOption(SVNOutputOption): - """Output the result of the conversion into an SVN repository.""" - - def __init__(self, target, author_transforms=None): - SVNOutputOption.__init__(self, author_transforms) - self.target = target - - def check(self): - if not Ctx().dry_run: - # Verify that svnadmin can be executed. The 'help' subcommand - # should be harmless. - try: - check_command_runs([Ctx().svnadmin_executable, 'help'], 'svnadmin') - except CommandFailedException, e: - raise FatalError( - '%s\n' - 'svnadmin could not be executed. Please ensure that it is\n' - 'installed and/or use the --svnadmin option.' % (e,)) - - def setup(self, svn_rev_count): - Log().quiet("Starting Subversion Repository.") - SVNOutputOption.setup(self, svn_rev_count) - if not Ctx().dry_run: - self.add_delegate( - RepositoryDelegate(Ctx().revision_reader, self.target) - ) - - -class NewRepositoryOutputOption(RepositoryOutputOption): - """Output the result of the conversion into a new SVN repository.""" - - def __init__( - self, target, fs_type=None, bdb_txn_nosync=None, author_transforms=None, create_options=[] - ): - RepositoryOutputOption.__init__(self, target, author_transforms) - self.bdb_txn_nosync = bdb_txn_nosync - - # Determine the options to be passed to "svnadmin create": - if not fs_type: - # User didn't say what kind repository (bdb, fsfs, etc). We - # still pass --bdb-txn-nosync. It's a no-op if the default - # repository type doesn't support it, but we definitely want it - # if BDB is the default. - self.create_options = ['--bdb-txn-nosync'] - elif fs_type == 'bdb': - # User explicitly specified bdb. - # - # Since this is a BDB repository, pass --bdb-txn-nosync, because - # it gives us a 4-5x speed boost (if cvs2svn is creating the - # repository, cvs2svn should be the only program accessing the - # svn repository until cvs2svn is done). But we'll turn no-sync - # off in self.finish(), unless instructed otherwise. - self.create_options = ['--fs-type=bdb', '--bdb-txn-nosync'] - else: - # User specified something other than bdb. - self.create_options = ['--fs-type=%s' % fs_type] - - # Now append the user's explicitly-set create options: - self.create_options += create_options - - def check(self): - RepositoryOutputOption.check(self) - if not Ctx().dry_run and os.path.exists(self.target): - raise FatalError("the svn-repos-path '%s' exists.\n" - "Remove it, or pass '--existing-svnrepos'." - % self.target) - - def setup(self, svn_rev_count): - Log().normal("Creating new repository '%s'" % (self.target)) - if Ctx().dry_run: - # Do not actually create repository: - pass - else: - call_command([ - Ctx().svnadmin_executable, 'create', - ] + self.create_options + [ - self.target - ]) - - RepositoryOutputOption.setup(self, svn_rev_count) - - def cleanup(self): - RepositoryOutputOption.cleanup(self) - - # If this is a BDB repository, and we created the repository, and - # --bdb-no-sync wasn't passed, then comment out the DB_TXN_NOSYNC - # line in the DB_CONFIG file, because txn syncing should be on by - # default in BDB repositories. - # - # We determine if this is a BDB repository by looking for the - # DB_CONFIG file, which doesn't exist in FSFS, rather than by - # checking self.fs_type. That way this code will Do The Right - # Thing in all circumstances. - db_config = os.path.join(self.target, "db/DB_CONFIG") - if Ctx().dry_run: - # Do not change repository: - pass - elif not self.bdb_txn_nosync and os.path.exists(db_config): - no_sync = 'set_flags DB_TXN_NOSYNC\n' - - contents = open(db_config, 'r').readlines() - index = contents.index(no_sync) - contents[index] = '# ' + no_sync - open(db_config, 'w').writelines(contents) - - -class ExistingRepositoryOutputOption(RepositoryOutputOption): - """Output the result of the conversion into an existing SVN repository.""" - - def __init__(self, target, author_transforms=None): - RepositoryOutputOption.__init__(self, target, author_transforms) - - def check(self): - RepositoryOutputOption.check(self) - if not os.path.isdir(self.target): - raise FatalError("the svn-repos-path '%s' is not an " - "existing directory." % self.target) - - diff --git a/cvs2svn_lib/svn_repository_delegate.py b/cvs2svn_lib/svn_repository_delegate.py deleted file mode 100644 index 00c4a01..0000000 --- a/cvs2svn_lib/svn_repository_delegate.py +++ /dev/null @@ -1,121 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2000-2008 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""This module contains the SVNRepositoryDelegate class.""" - - -class SVNRepositoryDelegate: - """Abstract superclass for any delegate to SVNOutputOption. - - Subclasses must implement all of the methods below. - - For each method, a subclass implements, in its own way, the - Subversion operation implied by the method's name. For example, for - the add_path method, the DumpfileDelegate would write out a - 'Node-add:' command to a Subversion dumpfile, the StdoutDelegate - would merely print that the path is being added to the repository, - and the RepositoryDelegate would actually cause the path to be added - to the Subversion repository that it is creating.""" - - def start_commit(self, revnum, revprops): - """An SVN commit is starting. - - Perform any actions needed to start an SVN commit with revision - number REVNUM and revision properties REVPROPS.""" - - raise NotImplementedError() - - def end_commit(self): - """An SVN commit is ending.""" - - raise NotImplementedError() - - def initialize_project(self, project): - """Initialize PROJECT. - - For Subversion, this means to create the trunk, branches, and tags - directories for PROJECT.""" - - raise NotImplementedError() - - def initialize_lod(self, lod): - """Initialize LOD with no contents. - - LOD is an instance of LineOfDevelopment. It is also possible for - an LOD to be created by copying from another LOD; such events are - indicated via the copy_lod() callback.""" - - raise NotImplementedError() - - def mkdir(self, lod, cvs_directory): - """Create CVS_DIRECTORY within LOD. - - LOD is a LineOfDevelopment; CVS_DIRECTORY is a CVSDirectory.""" - - raise NotImplementedError() - - def add_path(self, s_item): - """Add the path corresponding to S_ITEM to the repository. - - S_ITEM is an SVNCommitItem.""" - - raise NotImplementedError() - - def change_path(self, s_item): - """Change the path corresponding to S_ITEM in the repository. - - S_ITEM is an SVNCommitItem.""" - - raise NotImplementedError() - - def delete_lod(self, lod): - """Delete LOD from the repository. - - LOD is a LineOfDevelopment instance.""" - - raise NotImplementedError() - - def delete_path(self, lod, cvs_path): - """Delete CVS_PATH from LOD. - - LOD is a LineOfDevelopment; CVS_PATH is a CVSPath.""" - - raise NotImplementedError() - - def copy_lod(self, src_lod, dest_lod, src_revnum): - """Copy SRC_LOD in SRC_REVNUM to DEST_LOD. - - SRC_LOD and DEST_LOD are both LODs, and SRC_REVNUM is a subversion - revision number (int).""" - - raise NotImplementedError() - - def copy_path(self, cvs_path, src_lod, dest_lod, src_revnum): - """Copy CVS_PATH in SRC_LOD@SRC_REVNUM to DEST_LOD. - - CVS_PATH is a CVSPath, SRC_LOD and DEST_LOD are LODs, and - SRC_REVNUM is a subversion revision number (int).""" - - raise NotImplementedError() - - def finish(self): - """All SVN revisions have been committed. - - Perform any necessary cleanup.""" - - raise NotImplementedError() - - diff --git a/cvs2svn_lib/svn_revision_range.py b/cvs2svn_lib/svn_revision_range.py deleted file mode 100644 index 04ba7fa..0000000 --- a/cvs2svn_lib/svn_revision_range.py +++ /dev/null @@ -1,171 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2000-2008 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""This module contains the SVNRevisionRange class.""" - - -import bisect - -from cvs2svn_lib.common import SVN_INVALID_REVNUM - - -class SVNRevisionRange: - """The range of subversion revision numbers from which a path can be - copied. self.opening_revnum is the number of the earliest such - revision, and self.closing_revnum is one higher than the number of - the last such revision. If self.closing_revnum is None, then no - closings were registered.""" - - def __init__(self, source_lod, opening_revnum): - self.source_lod = source_lod - self.opening_revnum = opening_revnum - self.closing_revnum = None - - def add_closing(self, closing_revnum): - # When we have a non-trunk default branch, we may have multiple - # closings--only register the first closing we encounter. - if self.closing_revnum is None: - self.closing_revnum = closing_revnum - - def __contains__(self, revnum): - """Return True iff REVNUM is contained in the range.""" - - return ( - self.opening_revnum <= revnum \ - and (self.closing_revnum is None or revnum < self.closing_revnum) - ) - - def __str__(self): - if self.closing_revnum is None: - return '[%d:]' % (self.opening_revnum,) - else: - return '[%d:%d]' % (self.opening_revnum, self.closing_revnum,) - - def __repr__(self): - return str(self) - - -class RevisionScores: - """Represent the scores for a range of revisions.""" - - def __init__(self, svn_revision_ranges): - """Initialize based on SVN_REVISION_RANGES. - - SVN_REVISION_RANGES is a list of SVNRevisionRange objects. - - The score of an svn source is defined to be the number of - SVNRevisionRanges on that LOD that include the revision. A score - thus indicates that copying the corresponding revision (or any - following revision up to the next revision in the list) of the - object in question would yield that many correct paths at or - underneath the object. There may be other paths underneath it - that are not correct and would need to be deleted or recopied; - those can only be detected by descending and examining their - scores. - - If SVN_REVISION_RANGES is empty, then all scores are undefined.""" - - deltas_map = {} - - for range in svn_revision_ranges: - source_lod = range.source_lod - try: - deltas = deltas_map[source_lod] - except: - deltas = [] - deltas_map[source_lod] = deltas - deltas.append((range.opening_revnum, +1)) - if range.closing_revnum is not None: - deltas.append((range.closing_revnum, -1)) - - # A map: - # - # {SOURCE_LOD : [(REV1 SCORE1), (REV2 SCORE2), (REV3 SCORE3), ...]} - # - # where the tuples are sorted by revision number and the revision - # numbers are distinct. Score is the number of correct paths that - # would result from using the specified SOURCE_LOD and revision - # number (or any other revision preceding the next revision - # listed) as a source. For example, the score of any revision REV - # in the range REV2 <= REV < REV3 is equal to SCORE2. - self._scores_map = {} - - for (source_lod,deltas) in deltas_map.items(): - # Sort by revision number: - deltas.sort() - - # Initialize output list with zeroth element of deltas. This - # element must exist, because it was verified that - # svn_revision_ranges (and therefore openings) is not empty. - scores = [ deltas[0] ] - total = deltas[0][1] - for (rev, change) in deltas[1:]: - total += change - if rev == scores[-1][0]: - # Same revision as last entry; modify last entry: - scores[-1] = (rev, total) - else: - # Previously-unseen revision; create new entry: - scores.append((rev, total)) - self._scores_map[source_lod] = scores - - def get_score(self, range): - """Return the score for RANGE's opening revision. - - If RANGE doesn't appear explicitly in self.scores, use the score - of the higest revision preceding RANGE. If there are no preceding - revisions, then the score for RANGE is unknown; in this case, - return -1.""" - - try: - scores = self._scores_map[range.source_lod] - except KeyError: - return -1 - - # Remember, according to the tuple sorting rules, - # - # (revnum, anything,) < (revnum+1,) < (revnum+1, anything,) - predecessor_index = bisect.bisect_right( - scores, (range.opening_revnum + 1,) - ) - 1 - - if predecessor_index < 0: - return -1 - - return scores[predecessor_index][1] - - def get_best_revnum(self): - """Find the revnum with the highest score. - - Return (revnum, score) for the revnum with the highest score. If - the highest score is shared by multiple revisions, select the - oldest revision.""" - - best_source_lod = None - best_revnum = SVN_INVALID_REVNUM - best_score = 0 - - source_lods = self._scores_map.keys() - source_lods.sort() - for source_lod in source_lods: - for revnum, score in self._scores_map[source_lod]: - if score > best_score: - best_source_lod = source_lod - best_score = score - best_revnum = revnum - return best_source_lod, best_revnum, best_score - - diff --git a/cvs2svn_lib/svn_run_options.py b/cvs2svn_lib/svn_run_options.py deleted file mode 100644 index e757730..0000000 --- a/cvs2svn_lib/svn_run_options.py +++ /dev/null @@ -1,543 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2000-2009 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""This module manages cvs2svn run options.""" - - -import sys -import optparse -import datetime -import codecs - -from cvs2svn_lib.version import VERSION -from cvs2svn_lib import config -from cvs2svn_lib.common import warning_prefix -from cvs2svn_lib.common import error_prefix -from cvs2svn_lib.common import FatalError -from cvs2svn_lib.common import normalize_svn_path -from cvs2svn_lib.log import Log -from cvs2svn_lib.context import Ctx -from cvs2svn_lib.run_options import not_both -from cvs2svn_lib.run_options import RunOptions -from cvs2svn_lib.run_options import ContextOption -from cvs2svn_lib.run_options import IncompatibleOption -from cvs2svn_lib.run_options import authors -from cvs2svn_lib.man_writer import ManWriter -from cvs2svn_lib.project import Project -from cvs2svn_lib.svn_output_option import DumpfileOutputOption -from cvs2svn_lib.svn_output_option import ExistingRepositoryOutputOption -from cvs2svn_lib.svn_output_option import NewRepositoryOutputOption -from cvs2svn_lib.revision_manager import NullRevisionRecorder -from cvs2svn_lib.revision_manager import NullRevisionExcluder -from cvs2svn_lib.rcs_revision_manager import RCSRevisionReader -from cvs2svn_lib.cvs_revision_manager import CVSRevisionReader -from cvs2svn_lib.checkout_internal import InternalRevisionRecorder -from cvs2svn_lib.checkout_internal import InternalRevisionExcluder -from cvs2svn_lib.checkout_internal import InternalRevisionReader -from cvs2svn_lib.symbol_strategy import TrunkPathRule -from cvs2svn_lib.symbol_strategy import BranchesPathRule -from cvs2svn_lib.symbol_strategy import TagsPathRule - - -short_desc = 'convert a cvs repository into a subversion repository' - -synopsis = """\ -.B cvs2svn -[\\fIOPTION\\fR]... \\fIOUTPUT-OPTION CVS-REPOS-PATH\\fR -.br -.B cvs2svn -[\\fIOPTION\\fR]... \\fI--options=PATH\\fR -""" - -long_desc = """\ -Create a new Subversion repository based on the version history stored in a -CVS repository. Each CVS commit will be mirrored in the Subversion -repository, including such information as date of commit and id of the -committer. -.P -\\fICVS-REPOS-PATH\\fR is the filesystem path of the part of the CVS -repository that you want to convert. It is not possible to convert a -CVS repository to which you only have remote access; see the FAQ for -more information. This path doesn't have to be the top level -directory of a CVS repository; it can point at a project within a -repository, in which case only that project will be converted. This -path or one of its parent directories has to contain a subdirectory -called CVSROOT (though the CVSROOT directory can be empty). -.P -Multiple CVS repositories can be converted into a single Subversion -repository in a single run of cvs2svn, but only by using an -\\fB--options\\fR file. -""" - -files = """\ -A directory called \\fIcvs2svn-tmp\\fR (or the directory specified by -\\fB--tmpdir\\fR) is used as scratch space for temporary data files. -""" - -see_also = [ - ('cvs', '1'), - ('svn', '1'), - ('svnadmin', '1'), - ] - - -class SVNRunOptions(RunOptions): - def _get_output_options_group(self): - group = RunOptions._get_output_options_group(self) - - group.add_option(IncompatibleOption( - '--svnrepos', '-s', type='string', - action='store', - help='path where SVN repos should be created', - man_help=( - 'Write the output of the conversion into a Subversion repository ' - 'located at \\fIpath\\fR. This option causes a new Subversion ' - 'repository to be created at \\fIpath\\fR unless the ' - '\\fB--existing-svnrepos\\fR option is also used.' - ), - metavar='PATH', - )) - self.parser.set_default('existing_svnrepos', False) - group.add_option(IncompatibleOption( - '--existing-svnrepos', - action='store_true', - help='load into existing SVN repository (for use with --svnrepos)', - man_help=( - 'Load the converted CVS repository into an existing Subversion ' - 'repository, instead of creating a new repository. (This option ' - 'should be used in combination with ' - '\\fB-s\\fR/\\fB--svnrepos\\fR.) The repository must either be ' - 'empty or contain no paths that overlap with those that will ' - 'result from the conversion. Please note that you need write ' - 'permission for the repository files.' - ), - )) - group.add_option(IncompatibleOption( - '--fs-type', type='string', - action='store', - help=( - 'pass --fs-type=TYPE to "svnadmin create" (for use with ' - '--svnrepos)' - ), - man_help=( - 'Pass \\fI--fs-type\\fR=\\fItype\\fR to "svnadmin create" when ' - 'creating a new repository.' - ), - metavar='TYPE', - )) - self.parser.set_default('bdb_txn_nosync', False) - group.add_option(IncompatibleOption( - '--bdb-txn-nosync', - action='store_true', - help=( - 'pass --bdb-txn-nosync to "svnadmin create" (for use with ' - '--svnrepos)' - ), - man_help=( - 'Pass \\fI--bdb-txn-nosync\\fR to "svnadmin create" when ' - 'creating a new BDB-style Subversion repository.' - ), - )) - self.parser.set_default('create_options', []) - group.add_option(IncompatibleOption( - '--create-option', type='string', - action='append', dest='create_options', - help='pass OPT to "svnadmin create" (for use with --svnrepos)', - man_help=( - 'Pass \\fIopt\\fR to "svnadmin create" when creating a new ' - 'Subversion repository (can be specified multiple times to ' - 'pass multiple options).' - ), - metavar='OPT', - )) - group.add_option(IncompatibleOption( - '--dumpfile', type='string', - action='store', - help='just produce a dumpfile; don\'t commit to a repos', - man_help=( - 'Just produce a dumpfile; don\'t commit to an SVN repository. ' - 'Write the dumpfile to \\fIpath\\fR.' - ), - metavar='PATH', - )) - - group.add_option(ContextOption( - '--dry-run', - action='store_true', - help=( - 'do not create a repository or a dumpfile; just print what ' - 'would happen.' - ), - man_help=( - 'Do not create a repository or a dumpfile; just print the ' - 'details of what cvs2svn would do if it were really converting ' - 'your repository.' - ), - )) - - # Deprecated options: - self.parser.set_default('dump_only', False) - group.add_option(IncompatibleOption( - '--dump-only', - action='callback', callback=self.callback_dump_only, - help=optparse.SUPPRESS_HELP, - man_help=optparse.SUPPRESS_HELP, - )) - group.add_option(IncompatibleOption( - '--create', - action='callback', callback=self.callback_create, - help=optparse.SUPPRESS_HELP, - man_help=optparse.SUPPRESS_HELP, - )) - - return group - - def _get_conversion_options_group(self): - group = RunOptions._get_conversion_options_group(self) - - self.parser.set_default('trunk_base', config.DEFAULT_TRUNK_BASE) - group.add_option(IncompatibleOption( - '--trunk', type='string', - action='store', dest='trunk_base', - help=( - 'path for trunk (default: %s)' - % (config.DEFAULT_TRUNK_BASE,) - ), - man_help=( - 'Set the top-level path to use for trunk in the Subversion ' - 'repository. The default is \\fI%s\\fR.' - % (config.DEFAULT_TRUNK_BASE,) - ), - metavar='PATH', - )) - self.parser.set_default('branches_base', config.DEFAULT_BRANCHES_BASE) - group.add_option(IncompatibleOption( - '--branches', type='string', - action='store', dest='branches_base', - help=( - 'path for branches (default: %s)' - % (config.DEFAULT_BRANCHES_BASE,) - ), - man_help=( - 'Set the top-level path to use for branches in the Subversion ' - 'repository. The default is \\fI%s\\fR.' - % (config.DEFAULT_BRANCHES_BASE,) - ), - metavar='PATH', - )) - self.parser.set_default('tags_base', config.DEFAULT_TAGS_BASE) - group.add_option(IncompatibleOption( - '--tags', type='string', - action='store', dest='tags_base', - help=( - 'path for tags (default: %s)' - % (config.DEFAULT_TAGS_BASE,) - ), - man_help=( - 'Set the top-level path to use for tags in the Subversion ' - 'repository. The default is \\fI%s\\fR.' - % (config.DEFAULT_TAGS_BASE,) - ), - metavar='PATH', - )) - group.add_option(ContextOption( - '--no-prune', - action='store_false', dest='prune', - help='don\'t prune empty directories', - man_help=( - 'When all files are deleted from a directory in the Subversion ' - 'repository, don\'t delete the empty directory (the default is ' - 'to delete any empty directories).' - ), - )) - group.add_option(ContextOption( - '--no-cross-branch-commits', - action='store_false', dest='cross_branch_commits', - help='prevent the creation of cross-branch commits', - man_help=( - 'Prevent the creation of commits that affect files on multiple ' - 'branches at once.' - ), - )) - - return group - - def _get_extraction_options_group(self): - group = RunOptions._get_extraction_options_group(self) - - self.parser.set_default('use_internal_co', False) - group.add_option(IncompatibleOption( - '--use-internal-co', - action='store_true', - help=( - 'use internal code to extract revision contents ' - '(fastest but disk space intensive) (default)' - ), - man_help=( - 'Use internal code to extract revision contents. This ' - 'is up to 50% faster than using \\fB--use-rcs\\fR, but needs ' - 'a lot of disk space: roughly the size of your CVS repository ' - 'plus the peak size of a complete checkout of the repository ' - 'with all branches that existed and still had commits pending ' - 'at a given time. This option is the default.' - ), - )) - self.parser.set_default('use_cvs', False) - group.add_option(IncompatibleOption( - '--use-cvs', - action='store_true', - help=( - 'use CVS to extract revision contents (slower than ' - '--use-internal-co or --use-rcs)' - ), - man_help=( - 'Use CVS to extract revision contents. This option is slower ' - 'than \\fB--use-internal-co\\fR or \\fB--use-rcs\\fR.' - ), - )) - self.parser.set_default('use_rcs', False) - group.add_option(IncompatibleOption( - '--use-rcs', - action='store_true', - help=( - 'use RCS to extract revision contents (faster than ' - '--use-cvs but fails in some cases)' - ), - man_help=( - 'Use RCS \'co\' to extract revision contents. This option is ' - 'faster than \\fB--use-cvs\\fR but fails in some cases.' - ), - )) - - return group - - def _get_environment_options_group(self): - group = RunOptions._get_environment_options_group(self) - - group.add_option(ContextOption( - '--svnadmin', type='string', - action='store', dest='svnadmin_executable', - help='path to the "svnadmin" program', - man_help=( - 'Path to the \\fIsvnadmin\\fR program. (\\fIsvnadmin\\fR is ' - 'needed when the \\fB-s\\fR/\\fB--svnrepos\\fR output option is ' - 'used.)' - ), - metavar='PATH', - )) - - return group - - def callback_dump_only(self, option, opt_str, value, parser): - parser.values.dump_only = True - Log().error( - warning_prefix + - ': The --dump-only option is deprecated (it is implied ' - 'by --dumpfile).\n' - ) - - def callback_create(self, option, opt_str, value, parser): - Log().error( - warning_prefix + - ': The behaviour produced by the --create option is now the ' - 'default;\n' - 'passing the option is deprecated.\n' - ) - - def callback_manpage(self, option, opt_str, value, parser): - f = codecs.getwriter('utf_8')(sys.stdout) - ManWriter( - parser, - section='1', - date=datetime.date.today(), - source='Version %s' % (VERSION,), - manual='User Commands', - short_desc=short_desc, - synopsis=synopsis, - long_desc=long_desc, - files=files, - authors=authors, - see_also=see_also, - ).write_manpage(f) - sys.exit(0) - - def process_extraction_options(self): - """Process options related to extracting data from the CVS repository.""" - - ctx = Ctx() - options = self.options - - not_both(options.use_rcs, '--use-rcs', - options.use_cvs, '--use-cvs') - - not_both(options.use_rcs, '--use-rcs', - options.use_internal_co, '--use-internal-co') - - not_both(options.use_cvs, '--use-cvs', - options.use_internal_co, '--use-internal-co') - - if options.use_rcs: - ctx.revision_recorder = NullRevisionRecorder() - ctx.revision_excluder = NullRevisionExcluder() - ctx.revision_reader = RCSRevisionReader(options.co_executable) - elif options.use_cvs: - ctx.revision_recorder = NullRevisionRecorder() - ctx.revision_excluder = NullRevisionExcluder() - ctx.revision_reader = CVSRevisionReader(options.cvs_executable) - else: - # --use-internal-co is the default: - ctx.revision_recorder = InternalRevisionRecorder(compress=True) - ctx.revision_excluder = InternalRevisionExcluder() - ctx.revision_reader = InternalRevisionReader(compress=True) - - def process_output_options(self): - """Process the options related to SVN output.""" - - ctx = Ctx() - options = self.options - - if options.dump_only and not options.dumpfile: - raise FatalError("'--dump-only' requires '--dumpfile' to be specified.") - - if not options.svnrepos and not options.dumpfile and not ctx.dry_run: - raise FatalError("must pass one of '-s' or '--dumpfile'.") - - not_both(options.svnrepos, '-s', - options.dumpfile, '--dumpfile') - - not_both(options.dumpfile, '--dumpfile', - options.existing_svnrepos, '--existing-svnrepos') - - not_both(options.bdb_txn_nosync, '--bdb-txn-nosync', - options.existing_svnrepos, '--existing-svnrepos') - - not_both(options.dumpfile, '--dumpfile', - options.bdb_txn_nosync, '--bdb-txn-nosync') - - not_both(options.fs_type, '--fs-type', - options.existing_svnrepos, '--existing-svnrepos') - - if ( - options.fs_type - and options.fs_type != 'bdb' - and options.bdb_txn_nosync - ): - raise FatalError("cannot pass --bdb-txn-nosync with --fs-type=%s." - % options.fs_type) - - if options.svnrepos: - if options.existing_svnrepos: - ctx.output_option = ExistingRepositoryOutputOption(options.svnrepos) - else: - ctx.output_option = NewRepositoryOutputOption( - options.svnrepos, - fs_type=options.fs_type, bdb_txn_nosync=options.bdb_txn_nosync, - create_options=options.create_options) - else: - ctx.output_option = DumpfileOutputOption(options.dumpfile) - - def add_project( - self, - project_cvs_repos_path, - trunk_path=None, branches_path=None, tags_path=None, - initial_directories=[], - symbol_transforms=None, - symbol_strategy_rules=[], - ): - """Add a project to be converted. - - Most arguments are passed straight through to the Project - constructor. SYMBOL_STRATEGY_RULES is an iterable of - SymbolStrategyRules that will be applied to symbols in this - project.""" - - if trunk_path is not None: - trunk_path = normalize_svn_path(trunk_path, allow_empty=True) - if branches_path is not None: - branches_path = normalize_svn_path(branches_path, allow_empty=False) - if tags_path is not None: - tags_path = normalize_svn_path(tags_path, allow_empty=False) - - initial_directories = [ - path - for path in [trunk_path, branches_path, tags_path] - if path - ] + [ - normalize_svn_path(path) - for path in initial_directories - ] - - symbol_strategy_rules = list(symbol_strategy_rules) - - # Add rules to set the SVN paths for LODs depending on whether - # they are the trunk, tags, or branches: - if trunk_path is not None: - symbol_strategy_rules.append(TrunkPathRule(trunk_path)) - if branches_path is not None: - symbol_strategy_rules.append(BranchesPathRule(branches_path)) - if tags_path is not None: - symbol_strategy_rules.append(TagsPathRule(tags_path)) - - id = len(self.projects) - project = Project( - id, - project_cvs_repos_path, - initial_directories=initial_directories, - symbol_transforms=symbol_transforms, - ) - - self.projects.append(project) - self.project_symbol_strategy_rules.append(symbol_strategy_rules) - - def clear_projects(self): - """Clear the list of projects to be converted. - - This method is for the convenience of options files, which may - want to import one another.""" - - del self.projects[:] - del self.project_symbol_strategy_rules[:] - - def process_options(self): - # Consistency check for options and arguments. - if len(self.args) == 0: - self.usage() - sys.exit(1) - - if len(self.args) > 1: - Log().error(error_prefix + ": must pass only one CVS repository.\n") - self.usage() - sys.exit(1) - - cvsroot = self.args[0] - - self.process_extraction_options() - self.process_output_options() - self.process_symbol_strategy_options() - self.process_property_setter_options() - - # Create the default project (using ctx.trunk, ctx.branches, and - # ctx.tags): - self.add_project( - cvsroot, - trunk_path=self.options.trunk_base, - branches_path=self.options.branches_base, - tags_path=self.options.tags_base, - symbol_transforms=self.options.symbol_transforms, - symbol_strategy_rules=self.options.symbol_strategy_rules, - ) - - diff --git a/cvs2svn_lib/symbol.py b/cvs2svn_lib/symbol.py deleted file mode 100644 index e3a6b35..0000000 --- a/cvs2svn_lib/symbol.py +++ /dev/null @@ -1,246 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2000-2008 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""This module contains classes that represent trunk, branches, and tags. - -The classes in this module represent several concepts related to -symbols and lines of development in the abstract; that is, not within -a particular file, but across all files in a project. - -The classes in this module are organized into the following class -hierarchy: - -AbstractSymbol - | - +--LineOfDevelopment - | | - | +--Trunk - | | - | +--IncludedSymbol (also inherits from TypedSymbol) - | | - | +--Branch - | | - | +--Tag - | - +--Symbol - | - +--TypedSymbol - | - +--IncludedSymbol (also inherits from LineOfDevelopment) - | | - | +--Branch - | | - | +--Tag - | - +--ExcludedSymbol - -Please note the use of multiple inheritance. - -All AbstractSymbols contain an id that is globally unique across all -AbstractSymbols. Moreover, the id of an AbstractSymbol remains the -same even if the symbol is mutated (as described below), and two -AbstractSymbols are considered equal iff their ids are the same, even -if the two instances have different types. Symbols in different -projects always have different ids and are therefore always distinct. -(Indeed, this is pretty much the defining characteristic of a -project.) Even if, for example, two projects each have branches with -the same name, the Symbols representing the branches are distinct and -have distinct ids. (This is important to avoid having to rewrite -databases with new symbol ids in CollateSymbolsPass.) - -AbstractSymbols are all initially created in CollectRevsPass as either -Trunk or Symbol instances. A Symbol instance is essentially an -undifferentiated Symbol. - -In CollateSymbolsPass, it is decided which symbols will be converted -as branches, which as tags, and which excluded altogether. At the -beginning of this pass, the symbols are all represented by instances -of the non-specific Symbol class. During CollateSymbolsPass, each -Symbol instance is replaced by an instance of Branch, Tag, or -ExcludedSymbol with the same id. (Trunk instances are left -unchanged.) At the end of CollateSymbolsPass, all ExcludedSymbols are -discarded and processing continues with only Trunk, Branch, and Tag -instances. These three classes inherit from LineOfDevelopment; -therefore, in later passes the term LineOfDevelopment (abbreviated to -LOD) is used to refer to such objects.""" - - -from cvs2svn_lib.context import Ctx -from cvs2svn_lib.common import path_join - - -class AbstractSymbol: - """Base class for all other classes in this file.""" - - def __init__(self, id, project): - self.id = id - self.project = project - - def __hash__(self): - return self.id - - def __eq__(self, other): - return self.id == other.id - - -class LineOfDevelopment(AbstractSymbol): - """Base class for Trunk, Branch, and Tag. - - This is basically the abstraction for what will be a root tree in - the Subversion repository.""" - - def __init__(self, id, project): - AbstractSymbol.__init__(self, id, project) - self.base_path = None - - def get_path(self, *components): - """Return the svn path for this LineOfDevelopment.""" - - return path_join(self.base_path, *components) - - -class Trunk(LineOfDevelopment): - """Represent the main line of development.""" - - def __getstate__(self): - return (self.id, self.project.id, self.base_path,) - - def __setstate__(self, state): - (self.id, project_id, self.base_path,) = state - self.project = Ctx()._projects[project_id] - - def __cmp__(self, other): - if isinstance(other, Trunk): - return cmp(self.project, other.project) - elif isinstance(other, Symbol): - # Allow Trunk to compare less than Symbols: - return -1 - else: - raise NotImplementedError() - - def __str__(self): - """For convenience only. The format is subject to change at any time.""" - - return 'Trunk' - - def __repr__(self): - return '%s<%x>' % (self, self.id,) - - -class Symbol(AbstractSymbol): - """Represents a symbol within one project in the CVS repository. - - Instance of the Symbol class itself are used to represent symbols - from the CVS repository. CVS, of course, distinguishes between - normal tags and branch tags, but we allow symbol types to be changed - in CollateSymbolsPass. Therefore, we store all CVS symbols as - Symbol instances at the beginning of the conversion. - - In CollateSymbolsPass, Symbols are replaced by Branches, Tags, and - ExcludedSymbols (the latter being discarded at the end of that - pass).""" - - def __init__(self, id, project, name, preferred_parent_id=None): - AbstractSymbol.__init__(self, id, project) - self.name = name - - # If this symbol has a preferred parent, this member is the id of - # the LineOfDevelopment instance representing it. If the symbol - # never appeared in a CVSTag or CVSBranch (for example, because - # all of the branches on this LOD have been detached from the - # dependency tree), then this field is set to None. This field is - # set during FilterSymbolsPass. - self.preferred_parent_id = preferred_parent_id - - def __getstate__(self): - return (self.id, self.project.id, self.name, self.preferred_parent_id,) - - def __setstate__(self, state): - (self.id, project_id, self.name, self.preferred_parent_id,) = state - self.project = Ctx()._projects[project_id] - - def __cmp__(self, other): - if isinstance(other, Symbol): - return cmp(self.project, other.project) \ - or cmp(self.name, other.name) \ - or cmp(self.id, other.id) - elif isinstance(other, Trunk): - # Allow Symbols to compare greater than Trunk: - return +1 - else: - raise NotImplementedError() - - def __str__(self): - return self.name - - def __repr__(self): - return '%s<%x>' % (self, self.id,) - - -class TypedSymbol(Symbol): - """A Symbol whose type (branch, tag, or excluded) has been decided.""" - - def __init__(self, symbol): - Symbol.__init__( - self, symbol.id, symbol.project, symbol.name, - symbol.preferred_parent_id, - ) - - -class IncludedSymbol(TypedSymbol, LineOfDevelopment): - """A TypedSymbol that will be included in the conversion.""" - - def __init__(self, symbol): - TypedSymbol.__init__(self, symbol) - # We can't call the LineOfDevelopment constructor, so initialize - # its extra member explicitly: - try: - # If the old symbol had a base_path set, then use it: - self.base_path = symbol.base_path - except AttributeError: - self.base_path = None - - def __getstate__(self): - return (TypedSymbol.__getstate__(self), self.base_path,) - - def __setstate__(self, state): - (super_state, self.base_path,) = state - TypedSymbol.__setstate__(self, super_state) - - -class Branch(IncludedSymbol): - """An object that describes a CVS branch.""" - - def __str__(self): - """For convenience only. The format is subject to change at any time.""" - - return 'Branch(%r)' % (self.name,) - - -class Tag(IncludedSymbol): - def __str__(self): - """For convenience only. The format is subject to change at any time.""" - - return 'Tag(%r)' % (self.name,) - - -class ExcludedSymbol(TypedSymbol): - def __str__(self): - """For convenience only. The format is subject to change at any time.""" - - return 'ExcludedSymbol(%r)' % (self.name,) - - diff --git a/cvs2svn_lib/symbol_database.py b/cvs2svn_lib/symbol_database.py deleted file mode 100644 index 824f97b..0000000 --- a/cvs2svn_lib/symbol_database.py +++ /dev/null @@ -1,68 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2000-2008 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""This module contains the SymbolDatabase class.""" - - -import cPickle - -from cvs2svn_lib import config -from cvs2svn_lib.artifact_manager import artifact_manager - - -class SymbolDatabase: - """Read-only access to symbol database. - - This class allows iteration and lookups id -> symbol, where symbol - is a TypedSymbol instance. The whole database is read into memory - upon construction.""" - - def __init__(self): - # A map { id : TypedSymbol } - self._symbols = {} - - f = open(artifact_manager.get_temp_file(config.SYMBOL_DB), 'rb') - symbols = cPickle.load(f) - f.close() - for symbol in symbols: - self._symbols[symbol.id] = symbol - - def get_symbol(self, id): - """Return the symbol instance with id ID. - - Raise KeyError if the symbol is not known.""" - - return self._symbols[id] - - def __iter__(self): - """Iterate over the Symbol instances within this database.""" - - return self._symbols.itervalues() - - def close(self): - self._symbols = None - - -def create_symbol_database(symbols): - """Create and fill a symbol database. - - Record each symbol that is listed in SYMBOLS, which is an iterable - containing Trunk and TypedSymbol objects.""" - - f = open(artifact_manager.get_temp_file(config.SYMBOL_DB), 'wb') - cPickle.dump(symbols, f, -1) - f.close() - diff --git a/cvs2svn_lib/symbol_statistics.py b/cvs2svn_lib/symbol_statistics.py deleted file mode 100644 index 0d35a50..0000000 --- a/cvs2svn_lib/symbol_statistics.py +++ /dev/null @@ -1,521 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2000-2008 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""This module gathers and processes statistics about lines of development.""" - -import cPickle - -from cvs2svn_lib import config -from cvs2svn_lib.common import error_prefix -from cvs2svn_lib.common import FatalException -from cvs2svn_lib.log import Log -from cvs2svn_lib.artifact_manager import artifact_manager -from cvs2svn_lib.symbol import Trunk -from cvs2svn_lib.symbol import IncludedSymbol -from cvs2svn_lib.symbol import Branch -from cvs2svn_lib.symbol import Tag -from cvs2svn_lib.symbol import ExcludedSymbol - - -class SymbolPlanError(FatalException): - pass - - -class SymbolPlanException(SymbolPlanError): - def __init__(self, stats, symbol, msg): - self.stats = stats - self.symbol = symbol - SymbolPlanError.__init__( - self, - 'Cannot convert the following symbol to %s: %s\n %s' - % (symbol, msg, self.stats,) - ) - - -class IndeterminateSymbolException(SymbolPlanException): - def __init__(self, stats, symbol): - SymbolPlanException.__init__(self, stats, symbol, 'Indeterminate type') - - -class _Stats: - """A summary of information about a symbol (tag or branch). - - Members: - - lod -- the LineOfDevelopment instance of the lod being described - - tag_create_count -- the number of files in which this lod appears - as a tag - - branch_create_count -- the number of files in which this lod - appears as a branch - - branch_commit_count -- the number of files in which there were - commits on this lod - - trivial_import_count -- the number of files in which this branch - was purely a non-trunk default branch containing exactly one - revision. - - pure_ntdb_count -- the number of files in which this branch was - purely a non-trunk default branch (consisting only of - non-trunk default branch revisions). - - branch_blockers -- a set of Symbol instances for any symbols that - sprout from a branch with this name. - - possible_parents -- a map {LineOfDevelopment : count} indicating - in how many files each LOD could have served as the parent of - self.lod.""" - - def __init__(self, lod): - self.lod = lod - self.tag_create_count = 0 - self.branch_create_count = 0 - self.branch_commit_count = 0 - self.branch_blockers = set() - self.trivial_import_count = 0 - self.pure_ntdb_count = 0 - self.possible_parents = { } - - def register_tag_creation(self): - """Register the creation of this lod as a tag.""" - - self.tag_create_count += 1 - - def register_branch_creation(self): - """Register the creation of this lod as a branch.""" - - self.branch_create_count += 1 - - def register_branch_commit(self): - """Register that there were commit(s) on this branch in one file.""" - - self.branch_commit_count += 1 - - def register_branch_blocker(self, blocker): - """Register BLOCKER as preventing this symbol from being deleted. - - BLOCKER is a tag or a branch that springs from a revision on this - symbol.""" - - self.branch_blockers.add(blocker) - - def register_trivial_import(self): - """Register that this branch is a trivial import branch in one file.""" - - self.trivial_import_count += 1 - - def register_pure_ntdb(self): - """Register that this branch is a pure import branch in one file.""" - - self.pure_ntdb_count += 1 - - def register_possible_parent(self, lod): - """Register that LOD was a possible parent for SELF.lod in a file.""" - - self.possible_parents[lod] = self.possible_parents.get(lod, 0) + 1 - - def register_branch_possible_parents(self, cvs_branch, cvs_file_items): - """Register any possible parents of this symbol from CVS_BRANCH.""" - - # This routine is a bottleneck. So we define some local variables - # to speed up access to frequently-needed variables. - register = self.register_possible_parent - parent_cvs_rev = cvs_file_items[cvs_branch.source_id] - - # The "obvious" parent of a branch is the branch holding the - # revision where the branch is rooted: - register(parent_cvs_rev.lod) - - # Any other branches that are rooted at the same revision and - # were committed earlier than the branch are also possible - # parents: - symbol = cvs_branch.symbol - for branch_id in parent_cvs_rev.branch_ids: - parent_symbol = cvs_file_items[branch_id].symbol - # A branch cannot be its own parent, nor can a branch's - # parent be a branch that was created after it. So we stop - # iterating when we reached the branch whose parents we are - # collecting: - if parent_symbol == symbol: - break - register(parent_symbol) - - def register_tag_possible_parents(self, cvs_tag, cvs_file_items): - """Register any possible parents of this symbol from CVS_TAG.""" - - # This routine is a bottleneck. So use local variables to speed - # up access to frequently-needed objects. - register = self.register_possible_parent - parent_cvs_rev = cvs_file_items[cvs_tag.source_id] - - # The "obvious" parent of a tag is the branch holding the - # revision where the branch is rooted: - register(parent_cvs_rev.lod) - - # Branches that are rooted at the same revision are also - # possible parents: - for branch_id in parent_cvs_rev.branch_ids: - parent_symbol = cvs_file_items[branch_id].symbol - register(parent_symbol) - - def is_ghost(self): - """Return True iff this lod never really existed.""" - - return ( - not isinstance(self.lod, Trunk) - and self.branch_commit_count == 0 - and not self.branch_blockers - and not self.possible_parents - ) - - def check_valid(self, symbol): - """Check whether SYMBOL is a valid conversion of SELF.lod. - - It is planned to convert SELF.lod as SYMBOL. Verify that SYMBOL - is a TypedSymbol and that the information that it contains is - consistent with that stored in SELF.lod. (This routine does not - do higher-level tests of whether the chosen conversion is actually - sensible.) If there are any problems, raise a - SymbolPlanException.""" - - if not isinstance(symbol, (Trunk, Branch, Tag, ExcludedSymbol)): - raise IndeterminateSymbolException(self, symbol) - - if symbol.id != self.lod.id: - raise SymbolPlanException(self, symbol, 'IDs must match') - - if symbol.project != self.lod.project: - raise SymbolPlanException(self, symbol, 'Projects must match') - - if isinstance(symbol, IncludedSymbol) and symbol.name != self.lod.name: - raise SymbolPlanException(self, symbol, 'Names must match') - - def check_preferred_parent_allowed(self, symbol): - """Check that SYMBOL's preferred_parent_id is an allowed parent. - - SYMBOL is the planned conversion of SELF.lod. Verify that its - preferred_parent_id is a possible parent of SELF.lod. If not, - raise a SymbolPlanException describing the problem.""" - - if isinstance(symbol, IncludedSymbol) \ - and symbol.preferred_parent_id is not None: - for pp in self.possible_parents.keys(): - if pp.id == symbol.preferred_parent_id: - return - else: - raise SymbolPlanException( - self, symbol, - 'The selected parent is not among the symbol\'s ' - 'possible parents.' - ) - - def __str__(self): - return ( - '\'%s\' is ' - 'a tag in %d files, ' - 'a branch in %d files, ' - 'a trivial import in %d files, ' - 'a pure import in %d files, ' - 'and has commits in %d files' - % (self.lod, self.tag_create_count, self.branch_create_count, - self.trivial_import_count, self.pure_ntdb_count, - self.branch_commit_count) - ) - - def __repr__(self): - retval = ['%s\n possible parents:\n' % (self,)] - parent_counts = self.possible_parents.items() - parent_counts.sort(lambda a,b: - cmp(a[1], b[1])) - for (symbol, count) in parent_counts: - if isinstance(symbol, Trunk): - retval.append(' trunk : %d\n' % count) - else: - retval.append(' \'%s\' : %d\n' % (symbol.name, count)) - if self.branch_blockers: - blockers = list(self.branch_blockers) - blockers.sort() - retval.append(' blockers:\n') - for blocker in blockers: - retval.append(' \'%s\'\n' % (blocker,)) - return ''.join(retval) - - -class SymbolStatisticsCollector: - """Collect statistics about lines of development. - - Record a summary of information about each line of development in - the RCS files for later storage into a database. The database is - created in CollectRevsPass and it is used in CollateSymbolsPass (via - the SymbolStatistics class). - - collect_data._SymbolDataCollector inserts information into instances - of this class by by calling its register_*() methods. - - Its main purpose is to assist in the decisions about which symbols - can be treated as branches and tags and which may be excluded. - - The data collected by this class can be written to the file - config.SYMBOL_STATISTICS.""" - - def __init__(self): - # A map { lod -> _Stats } for all lines of development: - self._stats = { } - - def __getitem__(self, lod): - """Return the _Stats record for line of development LOD. - - Create and register a new one if necessary.""" - - try: - return self._stats[lod] - except KeyError: - stats = _Stats(lod) - self._stats[lod] = stats - return stats - - def register(self, cvs_file_items): - """Register the statistics for each symbol in CVS_FILE_ITEMS.""" - - for lod_items in cvs_file_items.iter_lods(): - if lod_items.lod is not None: - branch_stats = self[lod_items.lod] - - branch_stats.register_branch_creation() - - if lod_items.cvs_revisions: - branch_stats.register_branch_commit() - - if lod_items.is_trivial_import(): - branch_stats.register_trivial_import() - - if lod_items.is_pure_ntdb(): - branch_stats.register_pure_ntdb() - - for cvs_symbol in lod_items.iter_blockers(): - branch_stats.register_branch_blocker(cvs_symbol.symbol) - - if lod_items.cvs_branch is not None: - branch_stats.register_branch_possible_parents( - lod_items.cvs_branch, cvs_file_items - ) - - for cvs_tag in lod_items.cvs_tags: - tag_stats = self[cvs_tag.symbol] - - tag_stats.register_tag_creation() - - tag_stats.register_tag_possible_parents(cvs_tag, cvs_file_items) - - def purge_ghost_symbols(self): - """Purge any symbols that don't have any activity. - - Such ghost symbols can arise if a symbol was defined in an RCS - file but pointed at a non-existent revision.""" - - for stats in self._stats.values(): - if stats.is_ghost(): - Log().warn('Deleting ghost symbol: %s' % (stats.lod,)) - del self._stats[stats.lod] - - def close(self): - """Store the stats database to the SYMBOL_STATISTICS file.""" - - f = open(artifact_manager.get_temp_file(config.SYMBOL_STATISTICS), 'wb') - cPickle.dump(self._stats.values(), f, -1) - f.close() - self._stats = None - - -class SymbolStatistics: - """Read and handle line of development statistics. - - The statistics are read from a database created by - SymbolStatisticsCollector. This class has methods to process the - statistics information and help with decisions about: - - 1. What tags and branches should be processed/excluded - - 2. What tags should be forced to be branches and vice versa (this - class maintains some statistics to help the user decide) - - 3. Are there inconsistencies? - - - A symbol that is sometimes a branch and sometimes a tag - - - A forced branch with commit(s) on it - - - A non-excluded branch depends on an excluded branch - - The data in this class is read from a pickle file.""" - - def __init__(self, filename): - """Read the stats database from FILENAME.""" - - # A map { LineOfDevelopment -> _Stats } for all lines of - # development: - self._stats = { } - - # A map { LineOfDevelopment.id -> _Stats } for all lines of - # development: - self._stats_by_id = { } - - stats_list = cPickle.load(open(filename, 'rb')) - - for stats in stats_list: - self._stats[stats.lod] = stats - self._stats_by_id[stats.lod.id] = stats - - def __len__(self): - return len(self._stats) - - def __getitem__(self, lod_id): - return self._stats_by_id[lod_id] - - def get_stats(self, lod): - """Return the _Stats object for LineOfDevelopment instance LOD. - - Raise KeyError if no such lod exists.""" - - return self._stats[lod] - - def __iter__(self): - return self._stats.itervalues() - - def _check_blocked_excludes(self, symbol_map): - """Check for any excluded LODs that are blocked by non-excluded symbols. - - If any are found, describe the problem to Log().error() and raise - a FatalException.""" - - # A list of (lod,[blocker,...]) tuples for excludes that are - # blocked by the specified non-excluded blockers: - problems = [] - - for lod in symbol_map.itervalues(): - if isinstance(lod, ExcludedSymbol): - # Symbol is excluded; make sure that its blockers are also - # excluded: - lod_blockers = [] - for blocker in self.get_stats(lod).branch_blockers: - if isinstance(symbol_map.get(blocker, None), IncludedSymbol): - lod_blockers.append(blocker) - if lod_blockers: - problems.append((lod, lod_blockers)) - - if problems: - s = [] - for (lod, lod_blockers) in problems: - s.append( - '%s: %s cannot be excluded because the following symbols ' - 'depend on it:\n' - % (error_prefix, lod,) - ) - for blocker in lod_blockers: - s.append(' %s\n' % (blocker,)) - s.append('\n') - Log().error(''.join(s)) - - raise FatalException() - - def _check_invalid_tags(self, symbol_map): - """Check for commits on any symbols that are to be converted as tags. - - SYMBOL_MAP is a map {AbstractSymbol : (Trunk|TypedSymbol)} - indicating how each AbstractSymbol is to be converted. If there - is a commit on a symbol, then it cannot be converted as a tag. If - any tags with commits are found, output error messages describing - the problems then raise a FatalException.""" - - Log().quiet("Checking for forced tags with commits...") - - invalid_tags = [ ] - for symbol in symbol_map.itervalues(): - if isinstance(symbol, Tag): - stats = self.get_stats(symbol) - if stats.branch_commit_count > 0: - invalid_tags.append(symbol) - - if not invalid_tags: - # No problems found: - return - - s = [] - s.append( - '%s: The following branches cannot be forced to be tags ' - 'because they have commits:\n' - % (error_prefix,) - ) - for tag in invalid_tags: - s.append(' %s\n' % (tag.name)) - s.append('\n') - Log().error(''.join(s)) - - raise FatalException() - - def check_consistency(self, symbol_map): - """Check the plan for how to convert symbols for consistency. - - SYMBOL_MAP is a map {AbstractSymbol : (Trunk|TypedSymbol)} - indicating how each AbstractSymbol is to be converted. If any - problems are detected, describe the problem to Log().error() and - raise a FatalException.""" - - # We want to do all of the consistency checks even if one of them - # fails, so that the user gets as much feedback as possible. Set - # this variable to True if any errors are found. - error_found = False - - # Check that the planned preferred parents are OK for all - # IncludedSymbols: - for lod in symbol_map.itervalues(): - if isinstance(lod, IncludedSymbol): - stats = self.get_stats(lod) - try: - stats.check_preferred_parent_allowed(lod) - except SymbolPlanException, e: - Log().error('%s\n' % (e,)) - error_found = True - - try: - self._check_blocked_excludes(symbol_map) - except FatalException: - error_found = True - - try: - self._check_invalid_tags(symbol_map) - except FatalException: - error_found = True - - if error_found: - raise FatalException( - 'Please fix the above errors and restart CollateSymbolsPass' - ) - - def exclude_symbol(self, symbol): - """SYMBOL has been excluded; remove it from our statistics.""" - - del self._stats[symbol] - del self._stats_by_id[symbol.id] - - # Remove references to this symbol from other statistics objects: - for stats in self._stats.itervalues(): - stats.branch_blockers.discard(symbol) - if symbol in stats.possible_parents: - del stats.possible_parents[symbol] - - diff --git a/cvs2svn_lib/symbol_strategy.py b/cvs2svn_lib/symbol_strategy.py deleted file mode 100644 index 9d562a8..0000000 --- a/cvs2svn_lib/symbol_strategy.py +++ /dev/null @@ -1,685 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2000-2008 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""SymbolStrategy classes determine how to convert symbols.""" - -import re - -from cvs2svn_lib.common import FatalError -from cvs2svn_lib.common import path_join -from cvs2svn_lib.common import normalize_svn_path -from cvs2svn_lib.log import Log -from cvs2svn_lib.symbol import Trunk -from cvs2svn_lib.symbol import TypedSymbol -from cvs2svn_lib.symbol import Branch -from cvs2svn_lib.symbol import Tag -from cvs2svn_lib.symbol import ExcludedSymbol -from cvs2svn_lib.symbol_statistics import SymbolPlanError - - -class StrategyRule: - """A single rule that might determine how to convert a symbol.""" - - def start(self, symbol_statistics): - """This method is called once before get_symbol() is ever called. - - The StrategyRule can override this method to do whatever it wants - to prepare itself for work. SYMBOL_STATISTICS is an instance of - SymbolStatistics containing the statistics for all symbols in all - projects.""" - - pass - - def get_symbol(self, symbol, stats): - """Return an object describing what to do with the symbol in STATS. - - SYMBOL holds a Trunk or Symbol object as it has been determined so - far. Hopefully one of these method calls will turn any naked - Symbol instances into TypedSymbols. - - If this rule applies to the SYMBOL (whose statistics are collected - in STATS), then return a new or modified AbstractSymbol object. - If this rule doesn't apply, return SYMBOL unchanged.""" - - raise NotImplementedError() - - def finish(self): - """This method is called once after get_symbol() is done being called. - - The StrategyRule can override this method do whatever it wants to - release resources, etc.""" - - pass - - -class _RegexpStrategyRule(StrategyRule): - """A Strategy rule that bases its decisions on regexp matches. - - If self.regexp matches a symbol name, return self.action(symbol); - otherwise, return the symbol unchanged.""" - - def __init__(self, pattern, action): - """Initialize a _RegexpStrategyRule. - - PATTERN is a string that will be treated as a regexp pattern. - PATTERN must match a full symbol name for the rule to apply (i.e., - it is anchored at the beginning and end of the symbol name). - - ACTION is the class representing how the symbol should be - converted. It should be one of the classes Branch, Tag, or - ExcludedSymbol. - - If PATTERN matches a symbol name, then get_symbol() returns - ACTION(name, id); otherwise it returns SYMBOL unchanged.""" - - try: - self.regexp = re.compile('^' + pattern + '$') - except re.error: - raise FatalError("%r is not a valid regexp." % (pattern,)) - - self.action = action - - def log(self, symbol): - raise NotImplementedError() - - def get_symbol(self, symbol, stats): - if isinstance(symbol, (Trunk, TypedSymbol)): - return symbol - elif self.regexp.match(symbol.name): - self.log(symbol) - return self.action(symbol) - else: - return symbol - - -class ForceBranchRegexpStrategyRule(_RegexpStrategyRule): - """Force symbols matching pattern to be branches.""" - - def __init__(self, pattern): - _RegexpStrategyRule.__init__(self, pattern, Branch) - - def log(self, symbol): - Log().verbose( - 'Converting symbol %s as a branch because it matches regexp "%s".' - % (symbol, self.regexp.pattern,) - ) - - -class ForceTagRegexpStrategyRule(_RegexpStrategyRule): - """Force symbols matching pattern to be tags.""" - - def __init__(self, pattern): - _RegexpStrategyRule.__init__(self, pattern, Tag) - - def log(self, symbol): - Log().verbose( - 'Converting symbol %s as a tag because it matches regexp "%s".' - % (symbol, self.regexp.pattern,) - ) - - -class ExcludeRegexpStrategyRule(_RegexpStrategyRule): - """Exclude symbols matching pattern.""" - - def __init__(self, pattern): - _RegexpStrategyRule.__init__(self, pattern, ExcludedSymbol) - - def log(self, symbol): - Log().verbose( - 'Excluding symbol %s because it matches regexp "%s".' - % (symbol, self.regexp.pattern,) - ) - - -class ExcludeTrivialImportBranchRule(StrategyRule): - """If a symbol is a trivial import branch, exclude it. - - A trivial import branch is defined to be a branch that only had a - single import on it (no other kinds of commits) in every file in - which it appeared. In most cases these branches are worthless.""" - - def get_symbol(self, symbol, stats): - if isinstance(symbol, (Trunk, TypedSymbol)): - return symbol - if stats.tag_create_count == 0 \ - and stats.branch_create_count == stats.trivial_import_count: - Log().verbose( - 'Excluding branch %s because it is a trivial import branch.' - % (symbol,) - ) - return ExcludedSymbol(symbol) - else: - return symbol - - -class ExcludeVendorBranchRule(StrategyRule): - """If a symbol is a pure vendor branch, exclude it. - - A pure vendor branch is defined to be a branch that only had imports - on it (no other kinds of commits) in every file in which it - appeared.""" - - def get_symbol(self, symbol, stats): - if isinstance(symbol, (Trunk, TypedSymbol)): - return symbol - if stats.tag_create_count == 0 \ - and stats.branch_create_count == stats.pure_ntdb_count: - Log().verbose( - 'Excluding branch %s because it is a pure vendor branch.' - % (symbol,) - ) - return ExcludedSymbol(symbol) - else: - return symbol - - -class UnambiguousUsageRule(StrategyRule): - """If a symbol is used unambiguously as a tag/branch, convert it as such.""" - - def get_symbol(self, symbol, stats): - if isinstance(symbol, (Trunk, TypedSymbol)): - return symbol - is_tag = stats.tag_create_count > 0 - is_branch = stats.branch_create_count > 0 or stats.branch_commit_count > 0 - if is_tag and is_branch: - # Can't decide - return symbol - elif is_branch: - Log().verbose( - 'Converting symbol %s as a branch because it is always used ' - 'as a branch.' - % (symbol,) - ) - return Branch(symbol) - elif is_tag: - Log().verbose( - 'Converting symbol %s as a tag because it is always used ' - 'as a tag.' - % (symbol,) - ) - return Tag(symbol) - else: - # The symbol didn't appear at all: - return symbol - - -class BranchIfCommitsRule(StrategyRule): - """If there was ever a commit on the symbol, convert it as a branch.""" - - def get_symbol(self, symbol, stats): - if isinstance(symbol, (Trunk, TypedSymbol)): - return symbol - elif stats.branch_commit_count > 0: - Log().verbose( - 'Converting symbol %s as a branch because there are commits on it.' - % (symbol,) - ) - return Branch(symbol) - else: - return symbol - - -class HeuristicStrategyRule(StrategyRule): - """Convert symbol based on how often it was used as a branch/tag. - - Whichever happened more often determines how the symbol is - converted.""" - - def get_symbol(self, symbol, stats): - if isinstance(symbol, (Trunk, TypedSymbol)): - return symbol - elif stats.tag_create_count >= stats.branch_create_count: - Log().verbose( - 'Converting symbol %s as a tag because it is more often used ' - 'as a tag.' - % (symbol,) - ) - return Tag(symbol) - else: - Log().verbose( - 'Converting symbol %s as a branch because it is more often used ' - 'as a branch.' - % (symbol,) - ) - return Branch(symbol) - - -class AllBranchRule(StrategyRule): - """Convert all symbols as branches. - - Usually this rule will appear after a list of more careful rules - (including a general rule like UnambiguousUsageRule) and will - therefore only apply to the symbols not handled earlier.""" - - def get_symbol(self, symbol, stats): - if isinstance(symbol, (Trunk, TypedSymbol)): - return symbol - else: - Log().verbose( - 'Converting symbol %s as a branch because no other rules applied.' - % (symbol,) - ) - return Branch(symbol) - - -class AllTagRule(StrategyRule): - """Convert all symbols as tags. - - We don't worry about conflicts here; they will be caught later by - SymbolStatistics.check_consistency(). - - Usually this rule will appear after a list of more careful rules - (including a general rule like UnambiguousUsageRule) and will - therefore only apply to the symbols not handled earlier.""" - - def get_symbol(self, symbol, stats): - if isinstance(symbol, (Trunk, TypedSymbol)): - return symbol - else: - Log().verbose( - 'Converting symbol %s as a tag because no other rules applied.' - % (symbol,) - ) - return Tag(symbol) - - -class TrunkPathRule(StrategyRule): - """Set the base path for Trunk.""" - - def __init__(self, trunk_path): - self.trunk_path = trunk_path - - def get_symbol(self, symbol, stats): - if isinstance(symbol, Trunk) and symbol.base_path is None: - symbol.base_path = self.trunk_path - - return symbol - - -class SymbolPathRule(StrategyRule): - """Set the base paths for symbol LODs.""" - - def __init__(self, symbol_type, base_path): - self.symbol_type = symbol_type - self.base_path = base_path - - def get_symbol(self, symbol, stats): - if isinstance(symbol, self.symbol_type) and symbol.base_path is None: - symbol.base_path = path_join(self.base_path, symbol.name) - - return symbol - - -class BranchesPathRule(SymbolPathRule): - """Set the base paths for Branch LODs.""" - - def __init__(self, branch_path): - SymbolPathRule.__init__(self, Branch, branch_path) - - -class TagsPathRule(SymbolPathRule): - """Set the base paths for Tag LODs.""" - - def __init__(self, tag_path): - SymbolPathRule.__init__(self, Tag, tag_path) - - -class HeuristicPreferredParentRule(StrategyRule): - """Use a heuristic rule to pick preferred parents. - - Pick the parent that should be preferred for any TypedSymbols. As - parent, use the symbol that appeared most often as a possible parent - of the symbol in question. If multiple symbols are tied, choose the - one that comes first according to the Symbol class's natural sort - order.""" - - def _get_preferred_parent(self, stats): - """Return the LODs that are most often possible parents in STATS. - - Return the set of LinesOfDevelopment that appeared most often as - possible parents. The return value might contain multiple symbols - if multiple LinesOfDevelopment appeared the same number of times.""" - - best_count = -1 - best_symbol = None - for (symbol, count) in stats.possible_parents.items(): - if count > best_count or (count == best_count and symbol < best_symbol): - best_count = count - best_symbol = symbol - - if best_symbol is None: - return None - else: - return best_symbol - - def get_symbol(self, symbol, stats): - if isinstance(symbol, TypedSymbol) and symbol.preferred_parent_id is None: - preferred_parent = self._get_preferred_parent(stats) - if preferred_parent is None: - Log().verbose('%s has no preferred parent' % (symbol,)) - else: - symbol.preferred_parent_id = preferred_parent.id - Log().verbose( - 'The preferred parent of %s is %s' % (symbol, preferred_parent,) - ) - - return symbol - - -class ManualTrunkRule(StrategyRule): - """Change the SVN path of Trunk LODs. - - Members: - - project_id -- (int or None) The id of the project whose trunk - should be affected by this rule. If project_id is None, then - the rule is not project-specific. - - svn_path -- (str) The SVN path that should be used as the base - directory for this trunk. This member must not be None, - though it may be the empty string for a single-project, - trunk-only conversion. - - """ - - def __init__(self, project_id, svn_path): - self.project_id = project_id - self.svn_path = normalize_svn_path(svn_path, allow_empty=True) - - def get_symbol(self, symbol, stats): - if (self.project_id is not None - and self.project_id != stats.lod.project.id): - return symbol - - if isinstance(symbol, Trunk): - symbol.base_path = self.svn_path - - return symbol - - -def convert_as_branch(symbol): - Log().verbose( - 'Converting symbol %s as a branch because of manual setting.' - % (symbol,) - ) - return Branch(symbol) - - -def convert_as_tag(symbol): - Log().verbose( - 'Converting symbol %s as a tag because of manual setting.' - % (symbol,) - ) - return Tag(symbol) - - -def exclude(symbol): - Log().verbose( - 'Excluding symbol %s because of manual setting.' - % (symbol,) - ) - return ExcludedSymbol(symbol) - - -class ManualSymbolRule(StrategyRule): - """Change how particular symbols are converted. - - Members: - - project_id -- (int or None) The id of the project whose trunk - should be affected by this rule. If project_id is None, then - the rule is not project-specific. - - symbol_name -- (str) The name of the symbol that should be - affected by this rule. - - conversion -- (callable or None) A callable that converts the - symbol to its preferred output type. This should normally be - one of (convert_as_branch, convert_as_tag, exclude). If this - member is None, then this rule does not affect the symbol's - output type. - - svn_path -- (str) The SVN path that should be used as the base - directory for this trunk. This member must not be None, - though it may be the empty string for a single-project, - trunk-only conversion. - - parent_lod_name -- (str or None) The name of the line of - development that should be preferred as the parent of this - symbol. (The preferred parent is the line of development from - which the symbol should sprout.) If this member is set to the - string '.trunk.', then the symbol will be set to sprout - directly from trunk. If this member is set to None, then this - rule won't affect the symbol's parent. - - """ - - def __init__( - self, project_id, symbol_name, conversion, svn_path, parent_lod_name - ): - self.project_id = project_id - self.symbol_name = symbol_name - self.conversion = conversion - if svn_path is None: - self.svn_path = None - else: - self.svn_path = normalize_svn_path(svn_path, allow_empty=True) - self.parent_lod_name = parent_lod_name - - def _get_parent_by_id(self, parent_lod_name, stats): - """Return the LOD object for the parent with name PARENT_LOD_NAME. - - STATS is the _Stats object describing a symbol whose parent needs - to be determined from its name. If none of its possible parents - has name PARENT_LOD_NAME, raise a SymbolPlanError.""" - - for pp in stats.possible_parents.keys(): - if isinstance(pp, Trunk): - pass - elif pp.name == parent_lod_name: - return pp - else: - parent_counts = stats.possible_parents.items() - parent_counts.sort(lambda a,b: - cmp(a[1], b[1])) - lines = [ - '%s is not a valid parent for %s;' - % (parent_lod_name, stats.lod,), - ' possible parents (with counts):' - ] - for (symbol, count) in parent_counts: - if isinstance(symbol, Trunk): - lines.append(' .trunk. : %d' % count) - else: - lines.append(' %s : %d' % (symbol.name, count)) - raise SymbolPlanError('\n'.join(lines)) - - def get_symbol(self, symbol, stats): - if (self.project_id is not None - and self.project_id != stats.lod.project.id): - return symbol - - elif isinstance(symbol, Trunk): - return symbol - - elif self.symbol_name == stats.lod.name: - if self.conversion is not None: - symbol = self.conversion(symbol) - - if self.parent_lod_name is None: - pass - elif self.parent_lod_name == '.trunk.': - symbol.preferred_parent_id = stats.lod.project.trunk_id - else: - symbol.preferred_parent_id = self._get_parent_by_id( - self.parent_lod_name, stats - ).id - - if self.svn_path is not None: - symbol.base_path = self.svn_path - - return symbol - - -class SymbolHintsFileRule(StrategyRule): - """Use manual symbol configurations read from a file. - - The input file is line-oriented with the following format: - - [ []] - - Where the fields are separated by whitespace and - - project-id -- the numerical id of the Project to which the - symbol belongs (numbered starting with 0). This field can - be '.' if the rule is not project-specific. - - symbol-name -- the name of the symbol being specified, or - '.trunk.' if the rule should apply to trunk. - - conversion -- how the symbol should be treated in the - conversion. This is one of the following values: 'branch', - 'tag', or 'exclude'. This field can be '.' if the rule - shouldn't affect how the symbol is treated in the - conversion. - - svn-path -- the SVN path that should serve as the root path of - this LOD. The path should be expressed as a path relative - to the SVN root directory, with or without a leading '/'. - This field can be omitted or '.' if the rule shouldn't - affect the LOD's SVN path. - - parent-lod-name -- the name of the LOD that should serve as this - symbol's parent. This field can be omitted or '.' if the - rule shouldn't affect the symbol's parent, or it can be - '.trunk.' to indicate that the symbol should sprout from the - project's trunk.""" - - comment_re = re.compile(r'^(\#|$)') - - conversion_map = { - 'branch' : convert_as_branch, - 'tag' : convert_as_tag, - 'exclude' : exclude, - '.' : None, - } - - def __init__(self, filename): - self.filename = filename - - def start(self, symbol_statistics): - self._rules = [] - - f = open(self.filename, 'r') - for l in f: - l = l.rstrip() - s = l.lstrip() - if self.comment_re.match(s): - continue - fields = s.split() - - if len(fields) < 3: - raise FatalError( - 'The following line in "%s" cannot be parsed:\n "%s"' - % (self.filename, l,) - ) - - project_id = fields.pop(0) - symbol_name = fields.pop(0) - conversion = fields.pop(0) - - if fields: - svn_path = fields.pop(0) - if svn_path == '.': - svn_path = None - elif svn_path[0] == '/': - svn_path = svn_path[1:] - else: - svn_path = None - - if fields: - parent_lod_name = fields.pop(0) - else: - parent_lod_name = '.' - - if fields: - raise FatalError( - 'The following line in "%s" cannot be parsed:\n "%s"' - % (self.filename, l,) - ) - - if project_id == '.': - project_id = None - else: - try: - project_id = int(project_id) - except ValueError: - raise FatalError( - 'Illegal project_id in the following line:\n "%s"' % (l,) - ) - - if symbol_name == '.trunk.': - if conversion not in ['.', 'trunk']: - raise FatalError('Trunk cannot be converted as a different type') - - if parent_lod_name != '.': - raise FatalError('Trunk\'s parent cannot be set') - - if svn_path is None: - # This rule doesn't do anything: - pass - else: - self._rules.append(ManualTrunkRule(project_id, svn_path)) - - else: - try: - conversion = self.conversion_map[conversion] - except KeyError: - raise FatalError( - 'Illegal conversion in the following line:\n "%s"' % (l,) - ) - - if parent_lod_name == '.': - parent_lod_name = None - - if conversion is None \ - and svn_path is None \ - and parent_lod_name is None: - # There is nothing to be done: - pass - else: - self._rules.append( - ManualSymbolRule( - project_id, symbol_name, - conversion, svn_path, parent_lod_name - ) - ) - - for rule in self._rules: - rule.start(symbol_statistics) - - def get_symbol(self, symbol, stats): - for rule in self._rules: - symbol = rule.get_symbol(symbol, stats) - - return symbol - - def finish(self): - for rule in self._rules: - rule.finish() - - del self._rules - - diff --git a/cvs2svn_lib/symbol_transform.py b/cvs2svn_lib/symbol_transform.py deleted file mode 100644 index a4995b8..0000000 --- a/cvs2svn_lib/symbol_transform.py +++ /dev/null @@ -1,236 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2006-2009 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""This module contains classes to transform symbol names.""" - - -import os -import re - -from cvs2svn_lib.log import Log -from cvs2svn_lib.common import FatalError -from cvs2svn_lib.common import IllegalSVNPathError -from cvs2svn_lib.common import normalize_svn_path - - -class SymbolTransform: - """Transform symbol names arbitrarily.""" - - def transform(self, cvs_file, symbol_name, revision): - """Possibly transform SYMBOL_NAME, which was found in CVS_FILE. - - Return the transformed symbol name. If this SymbolTransform - doesn't apply, return the original SYMBOL_NAME. If this symbol - should be ignored entirely, return None. (Please note that - ignoring a branch via this mechanism only causes the branch *name* - to be ignored; the branch contents will still be converted. - Usually branches should be excluded using --exclude.) - - REVISION contains the CVS revision number to which the symbol was - attached in the file as a string (with zeros removed). - - This method is free to use the information in CVS_FILE (including - CVS_FILE.project) to decide whether and/or how to transform - SYMBOL_NAME.""" - - raise NotImplementedError() - - -class ReplaceSubstringsSymbolTransform(SymbolTransform): - """Replace specific substrings in symbol names. - - If the substring occurs multiple times, replace all copies.""" - - def __init__(self, old, new): - self.old = old - self.new = new - - def transform(self, cvs_file, symbol_name, revision): - return symbol_name.replace(self.old, self.new) - - -class NormalizePathsSymbolTransform(SymbolTransform): - def transform(self, cvs_file, symbol_name, revision): - try: - return normalize_svn_path(symbol_name) - except IllegalSVNPathError, e: - raise FatalError('Problem with %s: %s' % (symbol_name, e,)) - - -class CompoundSymbolTransform(SymbolTransform): - """A SymbolTransform that applies other SymbolTransforms in series. - - Each of the contained SymbolTransforms is applied, one after the - other. If any of them returns None, then None is returned (the - following SymbolTransforms are ignored).""" - - def __init__(self, symbol_transforms): - """Ininitialize a CompoundSymbolTransform. - - SYMBOL_TRANSFORMS is an iterable of SymbolTransform instances.""" - - self.symbol_transforms = list(symbol_transforms) - - def transform(self, cvs_file, symbol_name, revision): - for symbol_transform in self.symbol_transforms: - symbol_name = symbol_transform.transform( - cvs_file, symbol_name, revision - ) - if symbol_name is None: - # Don't continue with other symbol transforms: - break - - return symbol_name - - -class RegexpSymbolTransform(SymbolTransform): - """Transform symbols by using a regexp textual substitution.""" - - def __init__(self, pattern, replacement): - """Create a SymbolTransform that transforms symbols matching PATTERN. - - PATTERN is a regular expression that should match the whole symbol - name. REPLACEMENT is the replacement text, which may include - patterns like r'\1' or r'\g<1>' or r'\g' (where 'name' is a - reference to a named substring in the pattern of the form - r'(?P...)').""" - - self.pattern = re.compile('^' + pattern + '$') - self.replacement = replacement - - def transform(self, cvs_file, symbol_name, revision): - return self.pattern.sub(self.replacement, symbol_name) - - -class SymbolMapper(SymbolTransform): - """A SymbolTransform that transforms specific symbol definitions. - - The user has to specify the exact CVS filename, symbol name, and - revision number to be transformed, and the new name (or None if the - symbol should be ignored). The mappings can be set via a - constructor argument or by calling __setitem__().""" - - def __init__(self, items=[]): - """Initialize the mapper. - - ITEMS is a list of tuples (cvs_filename, symbol_name, revision, - new_name) which will be set as mappings.""" - - # A map {(cvs_filename, symbol_name, revision) : new_name}: - self._map = {} - - for (cvs_filename, symbol_name, revision, new_name) in items: - self[cvs_filename, symbol_name, revision] = new_name - - def __setitem__(self, (cvs_filename, symbol_name, revision), new_name): - """Set a mapping for a particular file, symbol, and revision.""" - - cvs_filename = os.path.normcase(os.path.normpath(cvs_filename)) - key = (cvs_filename, symbol_name, revision) - if key in self._map: - Log().warn( - 'Overwriting symbol transform for\n' - ' filename=%r symbol=%s revision=%s' - % (cvs_filename, symbol_name, revision,) - ) - self._map[key] = new_name - - def transform(self, cvs_file, symbol_name, revision): - cvs_filename = os.path.normcase(os.path.normpath(cvs_file.filename)) - return self._map.get( - (cvs_filename, symbol_name, revision), symbol_name - ) - - -class SubtreeSymbolMapper(SymbolTransform): - """A SymbolTransform that transforms symbols within a whole repo subtree. - - The user has to specify a CVS repository path (a filename or - directory) and the original symbol name. All symbols under that - path will be renamed to the specified new name (which can be None if - the symbol should be ignored). The mappings can be set via a - constructor argument or by calling __setitem__(). Only the most - specific rule is applied.""" - - def __init__(self, items=[]): - """Initialize the mapper. - - ITEMS is a list of tuples (cvs_path, symbol_name, new_name) - which will be set as mappings. cvs_path is a string naming a - directory within the CVS repository.""" - - # A map {symbol_name : {cvs_path : new_name}}: - self._map = {} - - for (cvs_path, symbol_name, new_name) in items: - self[cvs_path, symbol_name] = new_name - - def __setitem__(self, (cvs_path, symbol_name), new_name): - """Set a mapping for a particular file and symbol.""" - - try: - symbol_map = self._map[symbol_name] - except KeyError: - symbol_map = {} - self._map[symbol_name] = symbol_map - - cvs_path = os.path.normcase(os.path.normpath(cvs_path)) - if cvs_path in symbol_map: - Log().warn( - 'Overwriting symbol transform for\n' - ' directory=%r symbol=%s' - % (cvs_path, symbol_name,) - ) - symbol_map[cvs_path] = new_name - - def transform(self, cvs_file, symbol_name, revision): - try: - symbol_map = self._map[symbol_name] - except KeyError: - # No rules for that symbol name - return symbol_name - - cvs_path = os.path.normcase(os.path.normpath(cvs_file.filename)) - while True: - try: - return symbol_map[cvs_path] - except KeyError: - new_cvs_path = os.path.dirname(cvs_path) - if new_cvs_path == cvs_path: - # No rules found for that path; return symbol name unaltered. - return symbol_name - else: - cvs_path = new_cvs_path - - -class IgnoreSymbolTransform(SymbolTransform): - """Ignore symbols matching a specified regular expression.""" - - def __init__(self, pattern): - """Create an SymbolTransform that ignores symbols matching PATTERN. - - PATTERN is a regular expression that should match the whole symbol - name.""" - - self.pattern = re.compile('^' + pattern + '$') - - def transform(self, cvs_file, symbol_name, revision): - if self.pattern.match(symbol_name): - return None - else: - return symbol_name - - diff --git a/cvs2svn_lib/time_range.py b/cvs2svn_lib/time_range.py deleted file mode 100644 index f7dc234..0000000 --- a/cvs2svn_lib/time_range.py +++ /dev/null @@ -1,44 +0,0 @@ -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2006-2008 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""This module contains a class to manage time ranges.""" - - -class TimeRange(object): - __slots__ = ('t_min', 't_max') - - def __init__(self): - # Start out with a t_min higher than any incoming time T, and a - # t_max lower than any incoming T. This way the first T will push - # t_min down to T, and t_max up to T, naturally (without any - # special-casing), and successive times will then ratchet them - # outward as appropriate. - self.t_min = 1L<<32 - self.t_max = 0 - - def add(self, timestamp): - """Expand the range to encompass TIMESTAMP.""" - - if timestamp < self.t_min: - self.t_min = timestamp - if timestamp > self.t_max: - self.t_max = timestamp - - def __cmp__(self, other): - # Sorted by t_max, and break ties using t_min. - return cmp(self.t_max, other.t_max) or cmp(self.t_min, other.t_min) - - diff --git a/cvs2svn_lib/version.py b/cvs2svn_lib/version.py deleted file mode 100644 index 7900964..0000000 --- a/cvs2svn_lib/version.py +++ /dev/null @@ -1,27 +0,0 @@ -#!/usr/bin/env python2 -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2007-2009 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -# The version of cvs2svn: -VERSION = '2.3.0' - - -# If this file is run as a script, print the cvs2svn version number to -# stdout: -if __name__ == '__main__': - print VERSION - - diff --git a/cvs2svn_rcsparse/__init__.py b/cvs2svn_rcsparse/__init__.py deleted file mode 100644 index 829c117..0000000 --- a/cvs2svn_rcsparse/__init__.py +++ /dev/null @@ -1,26 +0,0 @@ -# -*-python-*- -# -# Copyright (C) 1999-2006 The ViewCVS Group. All Rights Reserved. -# -# By using this file, you agree to the terms and conditions set forth in -# the LICENSE.html file which can be found at the top level of the ViewVC -# distribution or at http://viewvc.org/license-1.html. -# -# For more information, visit http://viewvc.org/ -# -# ----------------------------------------------------------------------- - -"""This package provides parsing tools for RCS files.""" - -from common import * - -try: - from tparse import parse -except ImportError: - try: - from texttools import Parser - except ImportError: - from default import Parser - - def parse(file, sink): - return Parser().parse(file, sink) diff --git a/cvs2svn_rcsparse/common.py b/cvs2svn_rcsparse/common.py deleted file mode 100644 index 3eed600..0000000 --- a/cvs2svn_rcsparse/common.py +++ /dev/null @@ -1,324 +0,0 @@ -# -*-python-*- -# -# Copyright (C) 1999-2006 The ViewCVS Group. All Rights Reserved. -# -# By using this file, you agree to the terms and conditions set forth in -# the LICENSE.html file which can be found at the top level of the ViewVC -# distribution or at http://viewvc.org/license-1.html. -# -# For more information, visit http://viewvc.org/ -# -# ----------------------------------------------------------------------- - -"""common.py: common classes and functions for the RCS parsing tools.""" - -import calendar -import string - -class Sink: - def set_head_revision(self, revision): - pass - - def set_principal_branch(self, branch_name): - pass - - def set_access(self, accessors): - pass - - def define_tag(self, name, revision): - pass - - def set_locker(self, revision, locker): - pass - - def set_locking(self, mode): - """Used to signal locking mode. - - Called with mode argument 'strict' if strict locking - Not called when no locking used.""" - - pass - - def set_comment(self, comment): - pass - - def set_expansion(self, mode): - pass - - def admin_completed(self): - pass - - def define_revision(self, revision, timestamp, author, state, - branches, next): - pass - - def tree_completed(self): - pass - - def set_description(self, description): - pass - - def set_revision_info(self, revision, log, text): - pass - - def parse_completed(self): - pass - - -# -------------------------------------------------------------------------- -# -# EXCEPTIONS USED BY RCSPARSE -# - -class RCSParseError(Exception): - pass - - -class RCSIllegalCharacter(RCSParseError): - pass - - -class RCSExpected(RCSParseError): - def __init__(self, got, wanted): - RCSParseError.__init__( - self, - 'Unexpected parsing error in RCS file.\n' - 'Expected token: %s, but saw: %s' - % (wanted, got) - ) - - -class RCSStopParser(Exception): - pass - - -# -------------------------------------------------------------------------- -# -# STANDARD TOKEN STREAM-BASED PARSER -# - -class _Parser: - stream_class = None # subclasses need to define this - - def _read_until_semicolon(self): - """Read all tokens up to and including the next semicolon token. - - Return the tokens (not including the semicolon) as a list.""" - - tokens = [] - - while 1: - token = self.ts.get() - if token == ';': - break - tokens.append(token) - - return tokens - - def _parse_admin_head(self, token): - rev = self.ts.get() - if rev == ';': - # The head revision is not specified. Just drop the semicolon - # on the floor. - pass - else: - self.sink.set_head_revision(rev) - self.ts.match(';') - - def _parse_admin_branch(self, token): - branch = self.ts.get() - if branch != ';': - self.sink.set_principal_branch(branch) - self.ts.match(';') - - def _parse_admin_access(self, token): - accessors = self._read_until_semicolon() - if accessors: - self.sink.set_access(accessors) - - def _parse_admin_symbols(self, token): - while 1: - tag_name = self.ts.get() - if tag_name == ';': - break - self.ts.match(':') - tag_rev = self.ts.get() - self.sink.define_tag(tag_name, tag_rev) - - def _parse_admin_locks(self, token): - while 1: - locker = self.ts.get() - if locker == ';': - break - self.ts.match(':') - rev = self.ts.get() - self.sink.set_locker(rev, locker) - - def _parse_admin_strict(self, token): - self.sink.set_locking("strict") - self.ts.match(';') - - def _parse_admin_comment(self, token): - self.sink.set_comment(self.ts.get()) - self.ts.match(';') - - def _parse_admin_expand(self, token): - expand_mode = self.ts.get() - self.sink.set_expansion(expand_mode) - self.ts.match(';') - - admin_token_map = { - 'head' : _parse_admin_head, - 'branch' : _parse_admin_branch, - 'access' : _parse_admin_access, - 'symbols' : _parse_admin_symbols, - 'locks' : _parse_admin_locks, - 'strict' : _parse_admin_strict, - 'comment' : _parse_admin_comment, - 'expand' : _parse_admin_expand, - 'desc' : None, - } - - def parse_rcs_admin(self): - while 1: - # Read initial token at beginning of line - token = self.ts.get() - - try: - f = self.admin_token_map[token] - except KeyError: - # We're done once we reach the description of the RCS tree - if token[0] in string.digits: - self.ts.unget(token) - return - else: - # Chew up "newphrase" - # warn("Unexpected RCS token: $token\n") - pass - else: - if f is None: - self.ts.unget(token) - return - else: - f(self, token) - - def _parse_rcs_tree_entry(self, revision): - # Parse date - self.ts.match('date') - date = self.ts.get() - self.ts.match(';') - - # Convert date into timestamp - date_fields = string.split(date, '.') - # According to rcsfile(5): the year "contains just the last two - # digits of the year for years from 1900 through 1999, and all the - # digits of years thereafter". - if len(date_fields[0]) == 2: - date_fields[0] = '19' + date_fields[0] - date_fields = map(string.atoi, date_fields) - EPOCH = 1970 - if date_fields[0] < EPOCH: - raise ValueError, 'invalid year' - timestamp = calendar.timegm(tuple(date_fields) + (0, 0, 0,)) - - # Parse author - ### NOTE: authors containing whitespace are violations of the - ### RCS specification. We are making an allowance here because - ### CVSNT is known to produce these sorts of authors. - self.ts.match('author') - author = ' '.join(self._read_until_semicolon()) - - # Parse state - self.ts.match('state') - state = '' - while 1: - token = self.ts.get() - if token == ';': - break - state = state + token + ' ' - state = state[:-1] # toss the trailing space - - # Parse branches - self.ts.match('branches') - branches = self._read_until_semicolon() - - # Parse revision of next delta in chain - self.ts.match('next') - next = self.ts.get() - if next == ';': - next = None - else: - self.ts.match(';') - - # there are some files with extra tags in them. for example: - # owner 640; - # group 15; - # permissions 644; - # hardlinks @configure.in@; - # this is "newphrase" in RCSFILE(5). we just want to skip over these. - while 1: - token = self.ts.get() - if token == 'desc' or token[0] in string.digits: - self.ts.unget(token) - break - # consume everything up to the semicolon - self._read_until_semicolon() - - self.sink.define_revision(revision, timestamp, author, state, branches, - next) - - def parse_rcs_tree(self): - while 1: - revision = self.ts.get() - - # End of RCS tree description ? - if revision == 'desc': - self.ts.unget(revision) - return - - self._parse_rcs_tree_entry(revision) - - def parse_rcs_description(self): - self.ts.match('desc') - self.sink.set_description(self.ts.get()) - - def parse_rcs_deltatext(self): - while 1: - revision = self.ts.get() - if revision is None: - # EOF - break - text, sym2, log, sym1 = self.ts.mget(4) - if sym1 != 'log': - print `text[:100], sym2[:100], log[:100], sym1[:100]` - raise RCSExpected(sym1, 'log') - if sym2 != 'text': - raise RCSExpected(sym2, 'text') - ### need to add code to chew up "newphrase" - self.sink.set_revision_info(revision, log, text) - - def parse(self, file, sink): - self.ts = self.stream_class(file) - self.sink = sink - - self.parse_rcs_admin() - - # let sink know when the admin section has been completed - self.sink.admin_completed() - - self.parse_rcs_tree() - - # many sinks want to know when the tree has been completed so they can - # do some work to prep for the arrival of the deltatext - self.sink.tree_completed() - - self.parse_rcs_description() - self.parse_rcs_deltatext() - - # easiest for us to tell the sink it is done, rather than worry about - # higher level software doing it. - self.sink.parse_completed() - - self.ts = self.sink = None - -# -------------------------------------------------------------------------- diff --git a/cvs2svn_rcsparse/debug.py b/cvs2svn_rcsparse/debug.py deleted file mode 100644 index cfeaf2b..0000000 --- a/cvs2svn_rcsparse/debug.py +++ /dev/null @@ -1,122 +0,0 @@ -# -*-python-*- -# -# Copyright (C) 1999-2006 The ViewCVS Group. All Rights Reserved. -# -# By using this file, you agree to the terms and conditions set forth in -# the LICENSE.html file which can be found at the top level of the ViewVC -# distribution or at http://viewvc.org/license-1.html. -# -# For more information, visit http://viewvc.org/ -# -# ----------------------------------------------------------------------- - -"""debug.py: various debugging tools for the rcsparse package.""" - -import time - -from __init__ import parse -import common - - -class DebugSink(common.Sink): - def set_head_revision(self, revision): - print 'head:', revision - - def set_principal_branch(self, branch_name): - print 'branch:', branch_name - - def define_tag(self, name, revision): - print 'tag:', name, '=', revision - - def set_comment(self, comment): - print 'comment:', comment - - def set_description(self, description): - print 'description:', description - - def define_revision(self, revision, timestamp, author, state, - branches, next): - print 'revision:', revision - print ' timestamp:', timestamp - print ' author:', author - print ' state:', state - print ' branches:', branches - print ' next:', next - - def set_revision_info(self, revision, log, text): - print 'revision:', revision - print ' log:', log - print ' text:', text[:100], '...' - - -class DumpSink(common.Sink): - """Dump all the parse information directly to stdout. - - The output is relatively unformatted and untagged. It is intended as a - raw dump of the data in the RCS file. A copy can be saved, then changes - made to the parsing engine, then a comparison of the new output against - the old output. - """ - def __init__(self): - global sha - import sha - - def set_head_revision(self, revision): - print revision - - def set_principal_branch(self, branch_name): - print branch_name - - def define_tag(self, name, revision): - print name, revision - - def set_comment(self, comment): - print comment - - def set_description(self, description): - print description - - def define_revision(self, revision, timestamp, author, state, - branches, next): - print revision, timestamp, author, state, branches, next - - def set_revision_info(self, revision, log, text): - print revision, sha.new(log).hexdigest(), sha.new(text).hexdigest() - - def tree_completed(self): - print 'tree_completed' - - def parse_completed(self): - print 'parse_completed' - - -def dump_file(fname): - parse(open(fname, 'rb'), DumpSink()) - -def time_file(fname): - f = open(fname, 'rb') - s = common.Sink() - t = time.time() - parse(f, s) - t = time.time() - t - print t - -def _usage(): - print 'This is normally a module for importing, but it has a couple' - print 'features for testing as an executable script.' - print 'USAGE: %s COMMAND filename,v' % sys.argv[0] - print ' where COMMAND is one of:' - print ' dump: filename is "dumped" to stdout' - print ' time: filename is parsed with the time written to stdout' - sys.exit(1) - -if __name__ == '__main__': - import sys - if len(sys.argv) != 3: - _usage() - if sys.argv[1] == 'dump': - dump_file(sys.argv[2]) - elif sys.argv[1] == 'time': - time_file(sys.argv[2]) - else: - _usage() diff --git a/cvs2svn_rcsparse/default.py b/cvs2svn_rcsparse/default.py deleted file mode 100644 index 57f9fc6..0000000 --- a/cvs2svn_rcsparse/default.py +++ /dev/null @@ -1,172 +0,0 @@ -# -*-python-*- -# -# Copyright (C) 1999-2006 The ViewCVS Group. All Rights Reserved. -# -# By using this file, you agree to the terms and conditions set forth in -# the LICENSE.html file which can be found at the top level of the ViewVC -# distribution or at http://viewvc.org/license-1.html. -# -# For more information, visit http://viewvc.org/ -# -# ----------------------------------------------------------------------- -# -# This file was originally based on portions of the blame.py script by -# Curt Hagenlocher. -# -# ----------------------------------------------------------------------- - -import string -import common - -class _TokenStream: - token_term = frozenset(string.whitespace + ';:') - - # the algorithm is about the same speed for any CHUNK_SIZE chosen. - # grab a good-sized chunk, but not too large to overwhelm memory. - # note: we use a multiple of a standard block size - CHUNK_SIZE = 192 * 512 # about 100k - -# CHUNK_SIZE = 5 # for debugging, make the function grind... - - def __init__(self, file): - self.rcsfile = file - self.idx = 0 - self.buf = self.rcsfile.read(self.CHUNK_SIZE) - if self.buf == '': - raise RuntimeError, 'EOF' - - def get(self): - "Get the next token from the RCS file." - - # Note: we can afford to loop within Python, examining individual - # characters. For the whitespace and tokens, the number of iterations - # is typically quite small. Thus, a simple iterative loop will beat - # out more complex solutions. - - buf = self.buf - lbuf = len(buf) - idx = self.idx - - while 1: - if idx == lbuf: - buf = self.rcsfile.read(self.CHUNK_SIZE) - if buf == '': - # signal EOF by returning None as the token - del self.buf # so we fail if get() is called again - return None - lbuf = len(buf) - idx = 0 - - if buf[idx] not in string.whitespace: - break - - idx = idx + 1 - - if buf[idx] in ';:': - self.buf = buf - self.idx = idx + 1 - return buf[idx] - - if buf[idx] != '@': - end = idx + 1 - token = '' - while 1: - # find token characters in the current buffer - while end < lbuf and buf[end] not in self.token_term: - end = end + 1 - token = token + buf[idx:end] - - if end < lbuf: - # we stopped before the end, so we have a full token - idx = end - break - - # we stopped at the end of the buffer, so we may have a partial token - buf = self.rcsfile.read(self.CHUNK_SIZE) - lbuf = len(buf) - idx = end = 0 - - self.buf = buf - self.idx = idx - return token - - # a "string" which starts with the "@" character. we'll skip it when we - # search for content. - idx = idx + 1 - - chunks = [ ] - - while 1: - if idx == lbuf: - idx = 0 - buf = self.rcsfile.read(self.CHUNK_SIZE) - if buf == '': - raise RuntimeError, 'EOF' - lbuf = len(buf) - i = string.find(buf, '@', idx) - if i == -1: - chunks.append(buf[idx:]) - idx = lbuf - continue - if i == lbuf - 1: - chunks.append(buf[idx:i]) - idx = 0 - buf = '@' + self.rcsfile.read(self.CHUNK_SIZE) - if buf == '@': - raise RuntimeError, 'EOF' - lbuf = len(buf) - continue - if buf[i + 1] == '@': - chunks.append(buf[idx:i+1]) - idx = i + 2 - continue - - chunks.append(buf[idx:i]) - - self.buf = buf - self.idx = i + 1 - - return ''.join(chunks) - -# _get = get -# def get(self): - token = self._get() - print 'T:', `token` - return token - - def match(self, match): - "Try to match the next token from the input buffer." - - token = self.get() - if token != match: - raise common.RCSExpected(token, match) - - def unget(self, token): - "Put this token back, for the next get() to return." - - # Override the class' .get method with a function which clears the - # overridden method then returns the pushed token. Since this function - # will not be looked up via the class mechanism, it should be a "normal" - # function, meaning it won't have "self" automatically inserted. - # Therefore, we need to pass both self and the token thru via defaults. - - # note: we don't put this into the input buffer because it may have been - # @-unescaped already. - - def give_it_back(self=self, token=token): - del self.get - return token - - self.get = give_it_back - - def mget(self, count): - "Return multiple tokens. 'next' is at the end." - result = [ ] - for i in range(count): - result.append(self.get()) - result.reverse() - return result - - -class Parser(common._Parser): - stream_class = _TokenStream diff --git a/cvs2svn_rcsparse/parse_rcs_file.py b/cvs2svn_rcsparse/parse_rcs_file.py deleted file mode 100644 index 215845d..0000000 --- a/cvs2svn_rcsparse/parse_rcs_file.py +++ /dev/null @@ -1,73 +0,0 @@ -#!/usr/bin/python2 - -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2006-2007 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://cvs2svn.tigris.org/. -# ==================================================================== - -"""Parse an RCS file, showing the rcsparse callbacks that are called. - -This program is useful to see whether an RCS file has a problem (in -the sense of not being parseable by rcsparse) and also to illuminate -the correspondence between RCS file contents and rcsparse callbacks. - -The output of this program can also be considered to be a kind of -'canonical' format for RCS files, at least in so far as rcsparse -returns all relevant information in the file and provided that the -order of callbacks is always the same.""" - - -import sys -import os - - -class Logger: - def __init__(self, f, name): - self.f = f - self.name = name - - def __call__(self, *args): - self.f.write( - '%s(%s)\n' % (self.name, ', '.join(['%r' % arg for arg in args]),) - ) - - -class LoggingSink: - def __init__(self, f): - self.f = f - - def __getattr__(self, name): - return Logger(self.f, name) - - -if __name__ == '__main__': - # Since there is nontrivial logic in __init__.py, we have to import - # parse() via that file. First make sure that the directory - # containing this script is in the path: - sys.path.insert(0, os.path.dirname(sys.argv[0])) - - from __init__ import parse - - if sys.argv[1:]: - for path in sys.argv[1:]: - if os.path.isfile(path) and path.endswith(',v'): - parse( - open(path, 'rb'), LoggingSink(sys.stdout) - ) - else: - sys.stderr.write('%r is being ignored.\n' % path) - else: - parse(sys.stdin, LoggingSink(sys.stdout)) - - diff --git a/cvs2svn_rcsparse/rcparse_redundant_work.patch b/cvs2svn_rcsparse/rcparse_redundant_work.patch deleted file mode 100644 index b574dd2..0000000 --- a/cvs2svn_rcsparse/rcparse_redundant_work.patch +++ /dev/null @@ -1,99 +0,0 @@ -=== modified file 'cvs2svn_rcsparse/default.py' ---- cvs2svn_rcsparse/default.py 2007-11-18 23:05:32 +0000 -+++ cvs2svn_rcsparse/default.py 2010-01-23 10:21:47 +0000 -@@ -19,7 +19,7 @@ - import common - - class _TokenStream: -- token_term = string.whitespace + ';:' -+ token_term = frozenset(string.whitespace + ';:') - - # the algorithm is about the same speed for any CHUNK_SIZE chosen. - # grab a good-sized chunk, but not too large to overwhelm memory. -@@ -44,15 +44,17 @@ - # out more complex solutions. - - buf = self.buf -+ lbuf = len(buf) - idx = self.idx - - while 1: -- if idx == len(buf): -+ if idx == lbuf: - buf = self.rcsfile.read(self.CHUNK_SIZE) - if buf == '': - # signal EOF by returning None as the token - del self.buf # so we fail if get() is called again - return None -+ lbuf = len(buf) - idx = 0 - - if buf[idx] not in string.whitespace: -@@ -60,7 +62,7 @@ - - idx = idx + 1 - -- if buf[idx] == ';' or buf[idx] == ':': -+ if buf[idx] in ';:': - self.buf = buf - self.idx = idx + 1 - return buf[idx] -@@ -70,17 +72,18 @@ - token = '' - while 1: - # find token characters in the current buffer -- while end < len(buf) and buf[end] not in self.token_term: -+ while end < lbuf and buf[end] not in self.token_term: - end = end + 1 - token = token + buf[idx:end] - -- if end < len(buf): -+ if end < lbuf: - # we stopped before the end, so we have a full token - idx = end - break - - # we stopped at the end of the buffer, so we may have a partial token - buf = self.rcsfile.read(self.CHUNK_SIZE) -+ lbuf = len(buf) - idx = end = 0 - - self.buf = buf -@@ -94,22 +97,24 @@ - chunks = [ ] - - while 1: -- if idx == len(buf): -+ if idx == lbuf: - idx = 0 - buf = self.rcsfile.read(self.CHUNK_SIZE) - if buf == '': - raise RuntimeError, 'EOF' -+ lbuf = len(buf) - i = string.find(buf, '@', idx) - if i == -1: - chunks.append(buf[idx:]) -- idx = len(buf) -+ idx = lbuf - continue -- if i == len(buf) - 1: -+ if i == lbuf - 1: - chunks.append(buf[idx:i]) - idx = 0 - buf = '@' + self.rcsfile.read(self.CHUNK_SIZE) - if buf == '@': - raise RuntimeError, 'EOF' -+ lbuf = len(buf) - continue - if buf[i + 1] == '@': - chunks.append(buf[idx:i+1]) -@@ -121,7 +126,7 @@ - self.buf = buf - self.idx = i + 1 - -- return string.join(chunks, '') -+ return ''.join(chunks) - - # _get = get - # def get(self): - diff --git a/cvs2svn_rcsparse/run-tests.py b/cvs2svn_rcsparse/run-tests.py deleted file mode 100644 index eb9c3ea..0000000 --- a/cvs2svn_rcsparse/run-tests.py +++ /dev/null @@ -1,73 +0,0 @@ -#!/usr/bin/python2 - -# (Be in -*- python -*- mode.) -# -# ==================================================================== -# Copyright (c) 2007 CollabNet. All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://subversion.tigris.org/license-1.html. -# If newer versions of this license are posted there, you may use a -# newer version instead, at your option. -# -# This software consists of voluntary contributions made by many -# individuals. For exact contribution history, see the revision -# history and logs, available at http://viewvc.tigris.org/. -# ==================================================================== - -"""Run tests of rcsparse code.""" - -import sys -import os -import glob -from cStringIO import StringIO -from difflib import Differ - -# Since there is nontrivial logic in __init__.py, we have to import -# parse() via that file. First make sure that the directory -# containing this script is in the path: -script_dir = os.path.dirname(sys.argv[0]) -sys.path.insert(0, script_dir) - -from __init__ import parse -from parse_rcs_file import LoggingSink - - -test_dir = os.path.join(script_dir, 'test-data') - -filelist = glob.glob(os.path.join(test_dir, '*,v')) -filelist.sort() - -all_tests_ok = 1 - -for filename in filelist: - sys.stderr.write('%s: ' % (filename,)) - f = StringIO() - try: - parse(open(filename, 'rb'), LoggingSink(f)) - except Exception, e: - sys.stderr.write('Error parsing file: %s!\n' % (e,)) - all_tests_ok = 0 - else: - output = f.getvalue() - - expected_output_filename = filename[:-2] + '.out' - expected_output = open(expected_output_filename, 'rb').read() - - if output == expected_output: - sys.stderr.write('OK\n') - else: - sys.stderr.write('Output does not match expected output!\n') - differ = Differ() - for diffline in differ.compare( - expected_output.splitlines(1), output.splitlines(1) - ): - sys.stderr.write(diffline) - all_tests_ok = 0 - -if all_tests_ok: - sys.exit(0) -else: - sys.exit(1) - diff --git a/cvs2svn_rcsparse/texttools.py b/cvs2svn_rcsparse/texttools.py deleted file mode 100644 index 7c713eb..0000000 --- a/cvs2svn_rcsparse/texttools.py +++ /dev/null @@ -1,348 +0,0 @@ -# -*-python-*- -# -# Copyright (C) 1999-2006 The ViewCVS Group. All Rights Reserved. -# -# By using this file, you agree to the terms and conditions set forth in -# the LICENSE.html file which can be found at the top level of the ViewVC -# distribution or at http://viewvc.org/license-1.html. -# -# For more information, visit http://viewvc.org/ -# -# ----------------------------------------------------------------------- - -import string - -# note: this will raise an ImportError if it isn't available. the rcsparse -# package will recognize this and switch over to the default parser. -from mx import TextTools - -import common - - -# for convenience -_tt = TextTools - -_idchar_list = map(chr, range(33, 127)) + map(chr, range(160, 256)) -_idchar_list.remove('$') -_idchar_list.remove(',') -#_idchar_list.remove('.') # leave as part of 'num' symbol -_idchar_list.remove(':') -_idchar_list.remove(';') -_idchar_list.remove('@') -_idchar = string.join(_idchar_list, '') -_idchar_set = _tt.set(_idchar) - -_onechar_token_set = _tt.set(':;') - -_not_at_set = _tt.invset('@') - -_T_TOKEN = 30 -_T_STRING_START = 40 -_T_STRING_SPAN = 60 -_T_STRING_END = 70 - -_E_COMPLETE = 100 # ended on a complete token -_E_TOKEN = 110 # ended mid-token -_E_STRING_SPAN = 130 # ended within a string -_E_STRING_END = 140 # ended with string-end ('@') (could be mid-@@) - -_SUCCESS = +100 - -_EOF = 'EOF' -_CONTINUE = 'CONTINUE' -_UNUSED = 'UNUSED' - - -# continuation of a token over a chunk boundary -_c_token_table = ( - (_T_TOKEN, _tt.AllInSet, _idchar_set), - ) - -class _mxTokenStream: - - # the algorithm is about the same speed for any CHUNK_SIZE chosen. - # grab a good-sized chunk, but not too large to overwhelm memory. - # note: we use a multiple of a standard block size - CHUNK_SIZE = 192 * 512 # about 100k - -# CHUNK_SIZE = 5 # for debugging, make the function grind... - - def __init__(self, file): - self.rcsfile = file - self.tokens = [ ] - self.partial = None - - self.string_end = None - - def _parse_chunk(self, buf, start=0): - "Get the next token from the RCS file." - - buflen = len(buf) - - assert start < buflen - - # construct a tag table which refers to the buffer we need to parse. - table = ( - #1: ignore whitespace. with or without whitespace, move to the next rule. - (None, _tt.AllInSet, _tt.whitespace_set, +1), - - #2 - (_E_COMPLETE, _tt.EOF + _tt.AppendTagobj, _tt.Here, +1, _SUCCESS), - - #3: accumulate token text and exit, or move to the next rule. - (_UNUSED, _tt.AllInSet + _tt.AppendMatch, _idchar_set, +2), - - #4 - (_E_TOKEN, _tt.EOF + _tt.AppendTagobj, _tt.Here, -3, _SUCCESS), - - #5: single character tokens exit immediately, or move to the next rule - (_UNUSED, _tt.IsInSet + _tt.AppendMatch, _onechar_token_set, +2), - - #6 - (_E_COMPLETE, _tt.EOF + _tt.AppendTagobj, _tt.Here, -5, _SUCCESS), - - #7: if this isn't an '@' symbol, then we have a syntax error (go to a - # negative index to indicate that condition). otherwise, suck it up - # and move to the next rule. - (_T_STRING_START, _tt.Is + _tt.AppendTagobj, '@'), - - #8 - (None, _tt.Is, '@', +4, +1), - #9 - (buf, _tt.Is, '@', +1, -1), - #10 - (_T_STRING_END, _tt.Skip + _tt.AppendTagobj, 0, 0, +1), - #11 - (_E_STRING_END, _tt.EOF + _tt.AppendTagobj, _tt.Here, -10, _SUCCESS), - - #12 - (_E_STRING_SPAN, _tt.EOF + _tt.AppendTagobj, _tt.Here, +1, _SUCCESS), - - #13: suck up everything that isn't an AT. go to next rule to look for EOF - (buf, _tt.AllInSet, _not_at_set, 0, +1), - - #14: go back to look for double AT if we aren't at the end of the string - (_E_STRING_SPAN, _tt.EOF + _tt.AppendTagobj, _tt.Here, -6, _SUCCESS), - ) - - # Fast, texttools may be, but it's somewhat lacking in clarity. - # Here's an attempt to document the logic encoded in the table above: - # - # Flowchart: - # _____ - # / /\ - # 1 -> 2 -> 3 -> 5 -> 7 -> 8 -> 9 -> 10 -> 11 - # | \/ \/ \/ /\ \/ - # \ 4 6 12 14 / - # \_______/_____/ \ / / - # \ 13 / - # \__________________________________________/ - # - # #1: Skip over any whitespace. - # #2: If now EOF, exit with code _E_COMPLETE. - # #3: If we have a series of characters in _idchar_set, then: - # #4: Output them as a token, and go back to #1. - # #5: If we have a character in _onechar_token_set, then: - # #6: Output it as a token, and go back to #1. - # #7: If we do not have an '@', then error. - # If we do, then log a _T_STRING_START and continue. - # #8: If we have another '@', continue on to #9. Otherwise: - # #12: If now EOF, exit with code _E_STRING_SPAN. - # #13: Record the slice up to the next '@' (or EOF). - # #14: If now EOF, exit with code _E_STRING_SPAN. - # Otherwise, go back to #8. - # #9: If we have another '@', then we've just seen an escaped - # (by doubling) '@' within an @-string. Record a slice including - # just one '@' character, and jump back to #8. - # Otherwise, we've *either* seen the terminating '@' of an @-string, - # *or* we've seen one half of an escaped @@ sequence that just - # happened to be split over a chunk boundary - in either case, - # we continue on to #10. - # #10: Log a _T_STRING_END. - # #11: If now EOF, exit with _E_STRING_END. Otherwise, go back to #1. - - success, taglist, idx = _tt.tag(buf, table, start) - - if not success: - ### need a better way to report this error - raise common.RCSIllegalCharacter() - assert idx == buflen - - # pop off the last item - last_which = taglist.pop() - - i = 0 - tlen = len(taglist) - while i < tlen: - if taglist[i] == _T_STRING_START: - j = i + 1 - while j < tlen: - if taglist[j] == _T_STRING_END: - s = _tt.join(taglist, '', i+1, j) - del taglist[i:j] - tlen = len(taglist) - taglist[i] = s - break - j = j + 1 - else: - assert last_which == _E_STRING_SPAN - s = _tt.join(taglist, '', i+1) - del taglist[i:] - self.partial = (_T_STRING_SPAN, [ s ]) - break - i = i + 1 - - # figure out whether we have a partial last-token - if last_which == _E_TOKEN: - self.partial = (_T_TOKEN, [ taglist.pop() ]) - elif last_which == _E_COMPLETE: - pass - elif last_which == _E_STRING_SPAN: - assert self.partial - else: - assert last_which == _E_STRING_END - self.partial = (_T_STRING_END, [ taglist.pop() ]) - - taglist.reverse() - taglist.extend(self.tokens) - self.tokens = taglist - - def _set_end(self, taglist, text, l, r, subtags): - self.string_end = l - - def _handle_partial(self, buf): - which, chunks = self.partial - if which == _T_TOKEN: - success, taglist, idx = _tt.tag(buf, _c_token_table) - if not success: - # The start of this buffer was not a token. So the end of the - # prior buffer was a complete token. - self.tokens.insert(0, string.join(chunks, '')) - else: - assert len(taglist) == 1 and taglist[0][0] == _T_TOKEN \ - and taglist[0][1] == 0 and taglist[0][2] == idx - if idx == len(buf): - # - # The whole buffer was one huge token, so we may have a - # partial token again. - # - # Note: this modifies the list of chunks in self.partial - # - chunks.append(buf) - - # consumed the whole buffer - return len(buf) - - # got the rest of the token. - chunks.append(buf[:idx]) - self.tokens.insert(0, string.join(chunks, '')) - - # no more partial token - self.partial = None - - return idx - - if which == _T_STRING_END: - if buf[0] != '@': - self.tokens.insert(0, string.join(chunks, '')) - return 0 - chunks.append('@') - start = 1 - else: - start = 0 - - self.string_end = None - string_table = ( - (None, _tt.Is, '@', +3, +1), - (_UNUSED, _tt.Is + _tt.AppendMatch, '@', +1, -1), - (self._set_end, _tt.Skip + _tt.CallTag, 0, 0, _SUCCESS), - - (None, _tt.EOF, _tt.Here, +1, _SUCCESS), - - # suck up everything that isn't an AT. move to next rule to look - # for EOF - (_UNUSED, _tt.AllInSet + _tt.AppendMatch, _not_at_set, 0, +1), - - # go back to look for double AT if we aren't at the end of the string - (None, _tt.EOF, _tt.Here, -5, _SUCCESS), - ) - - success, unused, idx = _tt.tag(buf, string_table, - start, len(buf), chunks) - - # must have matched at least one item - assert success - - if self.string_end is None: - assert idx == len(buf) - self.partial = (_T_STRING_SPAN, chunks) - elif self.string_end < len(buf): - self.partial = None - self.tokens.insert(0, string.join(chunks, '')) - else: - self.partial = (_T_STRING_END, chunks) - - return idx - - def _parse_more(self): - buf = self.rcsfile.read(self.CHUNK_SIZE) - if not buf: - return _EOF - - if self.partial: - idx = self._handle_partial(buf) - if idx is None: - return _CONTINUE - if idx < len(buf): - self._parse_chunk(buf, idx) - else: - self._parse_chunk(buf) - - return _CONTINUE - - def get(self): - try: - return self.tokens.pop() - except IndexError: - pass - - while not self.tokens: - action = self._parse_more() - if action == _EOF: - return None - - return self.tokens.pop() - - -# _get = get -# def get(self): - token = self._get() - print 'T:', `token` - return token - - def match(self, match): - if self.tokens: - token = self.tokens.pop() - else: - token = self.get() - - if token != match: - raise common.RCSExpected(token, match) - - def unget(self, token): - self.tokens.append(token) - - def mget(self, count): - "Return multiple tokens. 'next' is at the end." - while len(self.tokens) < count: - action = self._parse_more() - if action == _EOF: - ### fix this - raise RuntimeError, 'EOF hit while expecting tokens' - result = self.tokens[-count:] - del self.tokens[-count:] - return result - - -class Parser(common._Parser): - stream_class = _mxTokenStream -- cgit v1.2.3-65-gdbad