diff options
author | David Kalnischkies <david@kalnischkies.de> | 2013-12-06 12:17:48 +0100 |
---|---|---|
committer | David Kalnischkies <david@kalnischkies.de> | 2013-12-13 11:59:49 +0100 |
commit | 47d2bc78adb49f3182f9a3d7a4baea363e772d64 (patch) | |
tree | 0536df9928330c35d98a8c2a41fb24405cae9ba1 /methods/rred.cc | |
parent | 9d39208af5c8c72d3886c70d603921cf427056ee (diff) |
implement POC client-side merging of pdiffs via apt-file
The idea of pdiffs is to avoid downloading the hole file by patching the
existing index. This works very well, but becomes slow if a lot of
patches needs to be applied to reconstruct an up-to-date index and in
recent years more and more dinstall (or similar) runs are executed
creating more and more pdiffs in the same amount of time, so pdiffs
became less useful.
The solution is simple: Reduce the amount of patches (which are very
small) which need to be applied on top of the index we have available
(which is usually pretty big).
This can be done in two ways: Either merge the patches on the
server-side so that the client has to download only one patch or the
patches are all downloaded and merged on the client-side.
The first needs a client who is doing one step at a time who can also
skip patches if it needs (APT supports this for a long time now).
The later is implemented by this commit, but depends on the server NOT
merging the patches and the patches being in a strict order in which no
patch is skipped.
This is traditionally the case for dak, but other repository creators
support merging – e.g. reprepro (which helpfully adds a flag indicating
that the patches are merged). To support both or even mixes a client
needs more information which isn't available for now.
This POC uses the external diffindex-rred included in apt-file to
do the heavy lifting of merging & applying all patches in one pass,
hence to test this feature apt-file needs to be installed.
Diffstat (limited to 'methods/rred.cc')
-rw-r--r-- | methods/rred.cc | 128 |
1 files changed, 96 insertions, 32 deletions
diff --git a/methods/rred.cc b/methods/rred.cc index 7c65f8f92..bea8ed263 100644 --- a/methods/rred.cc +++ b/methods/rred.cc @@ -11,6 +11,8 @@ #include <sys/stat.h> #include <sys/uio.h> +#include <sys/types.h> +#include <fcntl.h> #include <unistd.h> #include <utime.h> #include <stdio.h> @@ -465,50 +467,112 @@ bool RredMethod::Fetch(FetchItem *Itm) /*{{{*/ } else URIStart(Res); - if (Debug == true) - std::clog << "Patching " << Path << " with " << Path - << ".ed and putting result into " << Itm->DestFile << std::endl; - // Open the source and destination files (the d'tor of FileFd will do - // the cleanup/closing of the fds) - FileFd From(Path,FileFd::ReadOnly); - FileFd Patch(Path+".ed",FileFd::ReadOnly, FileFd::Gzip); - FileFd To(Itm->DestFile,FileFd::WriteAtomic); - To.EraseOnFailure(); - if (_error->PendingError() == true) - return false; - + std::string lastPatchName; Hashes Hash; - // now do the actual patching - State const result = patchMMap(Patch, From, To, &Hash); - if (result == MMAP_FAILED) { - // retry with patchFile - Patch.Seek(0); - From.Seek(0); - To.Open(Itm->DestFile,FileFd::WriteAtomic); + + // check for a single ed file + if (FileExists(Path+".ed") == true) + { + if (Debug == true) + std::clog << "Patching " << Path << " with " << Path + << ".ed and putting result into " << Itm->DestFile << std::endl; + + // Open the source and destination files + lastPatchName = Path + ".ed"; + FileFd From(Path,FileFd::ReadOnly); + FileFd To(Itm->DestFile,FileFd::WriteAtomic); + To.EraseOnFailure(); + FileFd Patch(lastPatchName, FileFd::ReadOnly, FileFd::Gzip); if (_error->PendingError() == true) - return false; - if (patchFile(Patch, From, To, &Hash) != ED_OK) { - return _error->WarningE("rred", _("Could not patch %s with mmap and with file operation usage - the patch seems to be corrupt."), Path.c_str()); + return false; + + // now do the actual patching + State const result = patchMMap(Patch, From, To, &Hash); + if (result == MMAP_FAILED) { + // retry with patchFile + Patch.Seek(0); + From.Seek(0); + To.Open(Itm->DestFile,FileFd::WriteAtomic); + if (_error->PendingError() == true) + return false; + if (patchFile(Patch, From, To, &Hash) != ED_OK) { + return _error->WarningE("rred", _("Could not patch %s with mmap and with file operation usage - the patch seems to be corrupt."), Path.c_str()); + } else if (Debug == true) { + std::clog << "rred: finished file patching of " << Path << " after mmap failed." << std::endl; + } + } else if (result != ED_OK) { + return _error->Errno("rred", _("Could not patch %s with mmap (but no mmap specific fail) - the patch seems to be corrupt."), Path.c_str()); } else if (Debug == true) { - std::clog << "rred: finished file patching of " << Path << " after mmap failed." << std::endl; + std::clog << "rred: finished mmap patching of " << Path << std::endl; } - } else if (result != ED_OK) { - return _error->Errno("rred", _("Could not patch %s with mmap (but no mmap specific fail) - the patch seems to be corrupt."), Path.c_str()); - } else if (Debug == true) { - std::clog << "rred: finished mmap patching of " << Path << std::endl; + + // write out the result + From.Close(); + Patch.Close(); + To.Close(); } + else + { + if (Debug == true) + std::clog << "Patching " << Path << " with all " << Path << ".ed.*.gz files and " + << "putting result into " << Itm->DestFile << std::endl; + + int From = open(Path.c_str(), O_RDONLY); + unlink(Itm->DestFile.c_str()); + int To = open(Itm->DestFile.c_str(), O_WRONLY | O_CREAT | O_EXCL, 0644); + SetCloseExec(From, false); + SetCloseExec(To, false); + + _error->PushToStack(); + std::vector<std::string> patches = GetListOfFilesInDir(flNotFile(Path), "gz", true, false); + _error->RevertToStack(); + + std::string externalrred = _config->Find("Dir::Bin::rred", "/usr/bin/diffindex-rred"); + std::vector<const char *> Args; + Args.reserve(22); + Args.push_back(externalrred.c_str()); + + std::string const baseName = Path + ".ed."; + for (std::vector<std::string>::const_iterator p = patches.begin(); + p != patches.end(); ++p) + if (p->compare(0, baseName.length(), baseName) == 0) + Args.push_back(p->c_str()); + + Args.push_back(NULL); + + pid_t Patcher = ExecFork(); + if (Patcher == 0) { + dup2(From, STDIN_FILENO); + dup2(To, STDOUT_FILENO); + + execvp(Args[0], (char **) &Args[0]); + std::cerr << "Failed to execute patcher " << Args[0] << "!" << std::endl; + _exit(100); + } + // last is NULL, so the one before is the last patch + lastPatchName = Args[Args.size() - 2]; - // write out the result - From.Close(); - Patch.Close(); - To.Close(); + if (ExecWait(Patcher, "rred") == false) + return _error->Errno("rred", "Patching via external rred failed"); + + close(From); + close(To); + + struct stat Buf; + if (stat(Itm->DestFile.c_str(), &Buf) != 0) + return _error->Errno("stat",_("Failed to stat")); + + To = open(Path.c_str(), O_RDONLY); + Hash.AddFD(To, Buf.st_size); + close(To); + } /* Transfer the modification times from the patch file to be able to see in which state the file should be and use the access time from the "old" file */ struct stat BufBase, BufPatch; if (stat(Path.c_str(),&BufBase) != 0 || - stat(std::string(Path+".ed").c_str(),&BufPatch) != 0) + stat(lastPatchName.c_str(), &BufPatch) != 0) return _error->Errno("stat",_("Failed to stat")); struct utimbuf TimeBuf; |