diff options
author | David Kalnischkies <david@kalnischkies.de> | 2013-12-06 12:17:48 +0100 |
---|---|---|
committer | David Kalnischkies <david@kalnischkies.de> | 2013-12-13 11:59:49 +0100 |
commit | 47d2bc78adb49f3182f9a3d7a4baea363e772d64 (patch) | |
tree | 0536df9928330c35d98a8c2a41fb24405cae9ba1 /apt-pkg | |
parent | 9d39208af5c8c72d3886c70d603921cf427056ee (diff) |
implement POC client-side merging of pdiffs via apt-file
The idea of pdiffs is to avoid downloading the hole file by patching the
existing index. This works very well, but becomes slow if a lot of
patches needs to be applied to reconstruct an up-to-date index and in
recent years more and more dinstall (or similar) runs are executed
creating more and more pdiffs in the same amount of time, so pdiffs
became less useful.
The solution is simple: Reduce the amount of patches (which are very
small) which need to be applied on top of the index we have available
(which is usually pretty big).
This can be done in two ways: Either merge the patches on the
server-side so that the client has to download only one patch or the
patches are all downloaded and merged on the client-side.
The first needs a client who is doing one step at a time who can also
skip patches if it needs (APT supports this for a long time now).
The later is implemented by this commit, but depends on the server NOT
merging the patches and the patches being in a strict order in which no
patch is skipped.
This is traditionally the case for dak, but other repository creators
support merging – e.g. reprepro (which helpfully adds a flag indicating
that the patches are merged). To support both or even mixes a client
needs more information which isn't available for now.
This POC uses the external diffindex-rred included in apt-file to
do the heavy lifting of merging & applying all patches in one pass,
hence to test this feature apt-file needs to be installed.
Diffstat (limited to 'apt-pkg')
-rw-r--r-- | apt-pkg/acquire-item.cc | 151 | ||||
-rw-r--r-- | apt-pkg/acquire-item.h | 100 |
2 files changed, 247 insertions, 4 deletions
diff --git a/apt-pkg/acquire-item.cc b/apt-pkg/acquire-item.cc index 009531c2e..b5b9577ef 100644 --- a/apt-pkg/acquire-item.cc +++ b/apt-pkg/acquire-item.cc @@ -498,14 +498,42 @@ bool pkgAcqDiffIndex::ParseDiffIndex(string IndexDiffFile) /*{{{*/ } // we have something, queue the next diff - if(found) + if(found) { // queue the diffs string::size_type const last_space = Description.rfind(" "); if(last_space != string::npos) Description.erase(last_space, Description.size()-last_space); - new pkgAcqIndexDiffs(Owner, RealURI, Description, Desc.ShortDesc, - ExpectedHash, ServerSha1, available_patches); + + /* decide if we should download patches one by one or in one go: + The first is good if the server merges patches, but many don't so client + based merging can be attempt in which case the second is better. + "bad things" will happen if patches are merged on the server, + but client side merging is attempt as well */ + bool pdiff_merge = _config->FindB("Acquire::PDiffs::Merge", true); + if (pdiff_merge == true) + { + // this perl script is provided by apt-file + pdiff_merge = FileExists(_config->FindFile("Dir::Bin::rred", "/usr/bin/diffindex-rred")); + if (pdiff_merge == true) + { + // reprepro adds this flag if it has merged patches on the server + std::string const precedence = Tags.FindS("X-Patch-Precedence"); + pdiff_merge = (precedence != "merged"); + } + } + + if (pdiff_merge == false) + new pkgAcqIndexDiffs(Owner, RealURI, Description, Desc.ShortDesc, + ExpectedHash, ServerSha1, available_patches); + else + { + std::vector<pkgAcqIndexMergeDiffs*> *diffs = new std::vector<pkgAcqIndexMergeDiffs*>(available_patches.size()); + for(size_t i = 0; i < available_patches.size(); ++i) + (*diffs)[i] = new pkgAcqIndexMergeDiffs(Owner, RealURI, Description, Desc.ShortDesc, ExpectedHash, + available_patches[i], diffs); + } + Complete = false; Status = StatDone; Dequeue(); @@ -754,6 +782,123 @@ void pkgAcqIndexDiffs::Done(string Message,unsigned long long Size,string Md5Has } } /*}}}*/ +// AcqIndexMergeDiffs::AcqIndexMergeDiffs - Constructor /*{{{*/ +pkgAcqIndexMergeDiffs::pkgAcqIndexMergeDiffs(pkgAcquire *Owner, + string const &URI, string const &URIDesc, + string const &ShortDesc, HashString const &ExpectedHash, + DiffInfo const &patch, + std::vector<pkgAcqIndexMergeDiffs*> const * const allPatches) + : Item(Owner), RealURI(URI), ExpectedHash(ExpectedHash), + patch(patch),allPatches(allPatches), State(StateFetchDiff) +{ + + DestFile = _config->FindDir("Dir::State::lists") + "partial/"; + DestFile += URItoFileName(URI); + + Debug = _config->FindB("Debug::pkgAcquire::Diffs",false); + + Description = URIDesc; + Desc.Owner = this; + Desc.ShortDesc = ShortDesc; + + Desc.URI = string(RealURI) + ".diff/" + patch.file + ".gz"; + Desc.Description = Description + " " + patch.file + string(".pdiff"); + DestFile = _config->FindDir("Dir::State::lists") + "partial/"; + DestFile += URItoFileName(RealURI + ".diff/" + patch.file); + + if(Debug) + std::clog << "pkgAcqIndexMergeDiffs: " << Desc.URI << std::endl; + + QueueURI(Desc); +} + /*}}}*/ +void pkgAcqIndexMergeDiffs::Failed(string Message,pkgAcquire::MethodConfig *Cnf)/*{{{*/ +{ + if(Debug) + std::clog << "pkgAcqIndexMergeDiffs failed: " << Desc.URI << " with " << Message << std::endl; + Complete = false; + Status = StatDone; + Dequeue(); + + // check if we are the first to fail, otherwise we are done here + State = StateDoneDiff; + for (std::vector<pkgAcqIndexMergeDiffs *>::const_iterator I = allPatches->begin(); + I != allPatches->end(); ++I) + if ((*I)->State == StateErrorDiff) + return; + + // first failure means we should fallback + State = StateErrorDiff; + std::clog << "Falling back to normal index file aquire" << std::endl; + new pkgAcqIndex(Owner, RealURI, Description,Desc.ShortDesc, + ExpectedHash); +} + /*}}}*/ +void pkgAcqIndexMergeDiffs::Done(string Message,unsigned long long Size,string Md5Hash, /*{{{*/ + pkgAcquire::MethodConfig *Cnf) +{ + if(Debug) + std::clog << "pkgAcqIndexMergeDiffs::Done(): " << Desc.URI << std::endl; + + Item::Done(Message,Size,Md5Hash,Cnf); + + string const FinalFile = _config->FindDir("Dir::State::lists") + URItoFileName(RealURI); + + if (State == StateFetchDiff) + { + // rred expects the patch as $FinalFile.ed.$patchname.gz + Rename(DestFile, FinalFile + ".ed." + patch.file + ".gz"); + + // check if this is the last completed diff + State = StateDoneDiff; + for (std::vector<pkgAcqIndexMergeDiffs *>::const_iterator I = allPatches->begin(); + I != allPatches->end(); ++I) + if ((*I)->State != StateDoneDiff) + { + if(Debug) + std::clog << "Not the last done diff in the batch: " << Desc.URI << std::endl; + return; + } + + // this is the last completed diff, so we are ready to apply now + State = StateApplyDiff; + + if(Debug) + std::clog << "Sending to rred method: " << FinalFile << std::endl; + + Local = true; + Desc.URI = "rred:" + FinalFile; + QueueURI(Desc); + Mode = "rred"; + return; + } + // success in download/apply all diffs, clean up + else if (State == StateApplyDiff) + { + // see if we really got the expected file + if(!ExpectedHash.empty() && !ExpectedHash.VerifyFile(DestFile)) + { + RenameOnError(HashSumMismatch); + return; + } + + // move the result into place + if(Debug) + std::clog << "Moving patched file in place: " << std::endl + << DestFile << " -> " << FinalFile << std::endl; + Rename(DestFile, FinalFile); + chmod(FinalFile.c_str(), 0644); + + // otherwise lists cleanup will eat the file + DestFile = FinalFile; + + // all set and done + Complete = true; + if(Debug) + std::clog << "allDone: " << DestFile << "\n" << std::endl; + } +} + /*}}}*/ // AcqIndex::AcqIndex - Constructor /*{{{*/ // --------------------------------------------------------------------- /* The package file is added to the queue and a second class is diff --git a/apt-pkg/acquire-item.h b/apt-pkg/acquire-item.h index 6b4f73708..5a1c7979c 100644 --- a/apt-pkg/acquire-item.h +++ b/apt-pkg/acquire-item.h @@ -429,7 +429,105 @@ class pkgAcqDiffIndex : public pkgAcquire::Item std::string ShortDesc, HashString ExpectedHash); }; /*}}}*/ -/** \brief An item that is responsible for fetching all the patches {{{ +/** \brief An item that is responsible for fetching client-merge patches {{{ + * that need to be applied to a given package index file. + * + * Instead of downloading and applying each patch one by one like its + * sister #pkgAcqIndexDiffs this class will download all patches at once + * and call rred with all the patches downloaded once. Rred will then + * merge and apply them in one go, which should be a lot faster – but is + * incompatible with server-based merges of patches like reprepro can do. + * + * \sa pkgAcqDiffIndex, pkgAcqIndex + */ +class pkgAcqIndexMergeDiffs : public pkgAcquire::Item +{ + protected: + + /** \brief If \b true, debugging output will be written to + * std::clog. + */ + bool Debug; + + /** \brief description of the item that is currently being + * downloaded. + */ + pkgAcquire::ItemDesc Desc; + + /** \brief URI of the package index file that is being + * reconstructed. + */ + std::string RealURI; + + /** \brief HashSum of the package index file that is being + * reconstructed. + */ + HashString ExpectedHash; + + /** \brief description of the file being downloaded. */ + std::string Description; + + /** \brief information about the current patch */ + struct DiffInfo const patch; + + /** \brief list of all download items for the patches */ + std::vector<pkgAcqIndexMergeDiffs*> const * const allPatches; + + /** The current status of this patch. */ + enum DiffState + { + /** \brief The diff is currently being fetched. */ + StateFetchDiff, + + /** \brief The diff is currently being applied. */ + StateApplyDiff, + + /** \brief the work with this diff is done */ + StateDoneDiff, + + /** \brief something bad happened and fallback was triggered */ + StateErrorDiff + } State; + + public: + /** \brief Called when the patch file failed to be downloaded. + * + * This method will fall back to downloading the whole index file + * outright; its arguments are ignored. + */ + virtual void Failed(std::string Message,pkgAcquire::MethodConfig *Cnf); + + virtual void Done(std::string Message,unsigned long long Size,std::string Md5Hash, + pkgAcquire::MethodConfig *Cnf); + virtual std::string DescURI() {return RealURI + "Index";}; + + /** \brief Create an index merge-diff item. + * + * \param Owner The pkgAcquire object that owns this item. + * + * \param URI The URI of the package index file being + * reconstructed. + * + * \param URIDesc A long description of this item. + * + * \param ShortDesc A brief description of this item. + * + * \param ExpectedHash The expected md5sum of the completely + * reconstructed package index file; the index file will be tested + * against this value when it is entirely reconstructed. + * + * \param patch contains infos about the patch this item is supposed + * to download which were read from the index + * + * \param allPatches contains all related items so that each item can + * check if it was the last one to complete the download step + */ + pkgAcqIndexMergeDiffs(pkgAcquire *Owner,std::string const &URI,std::string const &URIDesc, + std::string const &ShortDesc, HashString const &ExpectedHash, + DiffInfo const &patch, std::vector<pkgAcqIndexMergeDiffs*> const * const allPatches); +}; + /*}}}*/ +/** \brief An item that is responsible for fetching server-merge patches {{{ * that need to be applied to a given package index file. * * After downloading and applying a single patch, this item will |