diff options
author | Michael Vogt <mvo@ubuntu.com> | 2015-09-04 23:29:38 +0200 |
---|---|---|
committer | Michael Vogt <mvo@ubuntu.com> | 2015-09-04 23:29:38 +0200 |
commit | 7852873a1347fcab50393b545cc1e6edd65531c8 (patch) | |
tree | 73cfb2912e6676f8a36b6d28c0599175233035cc | |
parent | c7609dd7a418428ffbca4c81a7950c4f53c92450 (diff) |
Add support for writing by-hash dirs in apt-ftparchive
This option is enabled via the APT::FTPArchive::DoByHash switch.
It will also honor the option APT::FTPArchive::By-Hash-Keep that
controls how many previous generation of by-hash files should be
kept (defaults to 3).
Merged from https://github.com/mvo5/apt/tree/feature/apt-ftparchive-by-hash
-rw-r--r-- | ftparchive/byhash.cc | 63 | ||||
-rw-r--r-- | ftparchive/byhash.h | 23 | ||||
-rw-r--r-- | ftparchive/makefile | 2 | ||||
-rw-r--r-- | ftparchive/writer.cc | 67 | ||||
-rwxr-xr-x | test/integration/test-apt-ftparchive-by-hash | 66 |
5 files changed, 219 insertions, 2 deletions
diff --git a/ftparchive/byhash.cc b/ftparchive/byhash.cc new file mode 100644 index 000000000..04f8f1629 --- /dev/null +++ b/ftparchive/byhash.cc @@ -0,0 +1,63 @@ +// -*- mode: cpp; mode: fold -*- +// Description /*{{{*/ +/* ###################################################################### + + ByHash + + ByHash helper functions + + ##################################################################### */ + /*}}}*/ +// Include Files /*{{{*/ +#include <config.h> + +#include<algorithm> +#include<string> + +#include <unistd.h> +#include <sys/stat.h> + +#include <apt-pkg/fileutl.h> +#include <apt-pkg/hashes.h> +#include "byhash.h" + +// Delete all files in a directory except the most recent N ones +void DeleteAllButMostRecent(std::string dir, int KeepFiles) +{ + struct Cmp { + bool operator() (const std::string& lhs, const std::string& rhs) { + struct stat buf_l, buf_r; + stat(lhs.c_str(), &buf_l); + stat(rhs.c_str(), &buf_r); + if (buf_l.st_mtim.tv_sec == buf_r.st_mtim.tv_sec) + return buf_l.st_mtim.tv_nsec < buf_r.st_mtim.tv_nsec; + return buf_l.st_mtim.tv_sec < buf_r.st_mtim.tv_sec; + } + }; + + if (!DirectoryExists(dir)) + return; + + auto files = GetListOfFilesInDir(dir, false); + std::sort(files.begin(), files.end(), Cmp()); + + for (auto I=files.begin(); I<files.end()-KeepFiles; I++) { + unlink((*I).c_str()); + } +} + +// Takes a input filename (e.g. binary-i386/Packages) and a hashstring +// of the Input data and transforms it into a suitable by-hash filename +std::string GenByHashFilename(std::string Input, HashString h) +{ + std::string ByHashOutputFile = Input; + std::string const ByHash = "/by-hash/" + h.HashType() + "/" + h.HashValue(); + size_t trailing_slash = ByHashOutputFile.find_last_of("/"); + if (trailing_slash == std::string::npos) + trailing_slash = 0; + ByHashOutputFile = ByHashOutputFile.replace( + trailing_slash, + ByHashOutputFile.substr(trailing_slash+1).size()+1, + ByHash); + return ByHashOutputFile; +} diff --git a/ftparchive/byhash.h b/ftparchive/byhash.h new file mode 100644 index 000000000..ce05397ad --- /dev/null +++ b/ftparchive/byhash.h @@ -0,0 +1,23 @@ +// -*- mode: cpp; mode: fold -*- +// Description /*{{{*/ +/* ###################################################################### + + ByHash + + ByHash helper functions + + ##################################################################### */ + /*}}}*/ +#ifndef BYHASH_H +#define BYHASH_H + +class HashString; + +// Delete all files in "dir" except for the number specified in "KeepFiles" +// that are the most recent ones +void DeleteAllButMostRecent(std::string dir, int KeepFiles); + +// takes a regular input filename +std::string GenByHashFilename(std::string Input, HashString h); + +#endif diff --git a/ftparchive/makefile b/ftparchive/makefile index e67272e1e..c80487c3f 100644 --- a/ftparchive/makefile +++ b/ftparchive/makefile @@ -12,7 +12,7 @@ PROGRAM=apt-ftparchive SLIBS = -lapt-pkg -lapt-inst -lapt-private $(BDBLIB) $(INTLLIBS) LIB_MAKES = apt-pkg/makefile apt-inst/makefile apt-private/makefile SOURCE = apt-ftparchive.cc cachedb.cc writer.cc contents.cc override.cc \ - multicompress.cc sources.cc + multicompress.cc sources.cc byhash.cc include $(PROGRAM_H) else PROGRAM=apt-ftparchive diff --git a/ftparchive/writer.cc b/ftparchive/writer.cc index 7f09a3758..82049836a 100644 --- a/ftparchive/writer.cc +++ b/ftparchive/writer.cc @@ -40,11 +40,13 @@ #include <sstream> #include <memory> #include <utility> +#include <algorithm> #include "apt-ftparchive.h" #include "writer.h" #include "cachedb.h" #include "multicompress.h" +#include "byhash.h" #include <apti18n.h> /*}}}*/ @@ -1018,7 +1020,9 @@ ReleaseWriter::ReleaseWriter(FileFd * const GivenOutput, string const &/*DB*/) : Fields["Architectures"] = ""; Fields["Components"] = ""; Fields["Description"] = ""; - + if (_config->FindB("APT::FTPArchive::DoByHash", true) == true) + Fields["Acquire-By-Hash"] = "true"; + for(map<string,string>::const_iterator I = Fields.begin(); I != Fields.end(); ++I) @@ -1070,6 +1074,31 @@ bool ReleaseWriter::DoPackage(string FileName) CheckSums[NewFileName].Hashes = hs.GetHashStringList(); fd.Close(); + // FIXME: wrong layer in the code(?) + // FIXME2: symlink instead of create a copy + if (_config->FindB("APT::FTPArchive::DoByHash", true) == true) + { + std::string Input = FileName; + HashStringList hsl = hs.GetHashStringList(); + for(HashStringList::const_iterator h = hsl.begin(); + h != hsl.end(); ++h) + { + if (!h->usable()) + continue; + std::string ByHashOutputFile = GenByHashFilename(Input, *h); + + std::string ByHashOutputDir = flNotFile(ByHashOutputFile); + if(!CreateDirectory(flNotFile(Input), ByHashOutputDir)) + return _error->Warning("can not create dir %s", flNotFile(ByHashOutputFile).c_str()); + + // write new hashes + FileFd In(Input, FileFd::ReadOnly); + FileFd Out(ByHashOutputFile, FileFd::WriteEmpty); + if(!CopyFile(In, Out)) + return _error->Warning("failed to copy %s %s", Input.c_str(), ByHashOutputFile.c_str()); + } + } + return true; } @@ -1107,4 +1136,40 @@ void ReleaseWriter::Finish() printChecksumTypeRecord(*Output, "SHA256", CheckSums); if ((DoHashes & Hashes::SHA512SUM) == Hashes::SHA512SUM) printChecksumTypeRecord(*Output, "SHA512", CheckSums); + + // go by-hash cleanup + map<string,ReleaseWriter::CheckSum>::const_iterator prev = CheckSums.begin(); + if (_config->FindB("APT::FTPArchive::DoByHash", true) == true) + { + for(map<string,ReleaseWriter::CheckSum>::const_iterator I = CheckSums.begin(); + I != CheckSums.end(); ++I) + { + if (I->first == "Release" || I->first == "InRelease") + continue; + + // keep iterating until we find a new subdir + if(flNotFile(I->first) == flNotFile(prev->first)) + continue; + + // clean that subdir up + int keepFiles = _config->FindI("APT::FTPArchive::By-Hash-Keep", 3); + // calculate how many compressors are used (the amount of files + // in that subdir generated for this run) + keepFiles *= std::distance(prev, I); + prev = I; + + HashStringList hsl = prev->second.Hashes; + for(HashStringList::const_iterator h = hsl.begin(); + h != hsl.end(); ++h) + { + + if (!h->usable()) + continue; + + std::string RealFilename = DirStrip+"/"+prev->first; + std::string ByHashOutputFile = GenByHashFilename(RealFilename, *h); + DeleteAllButMostRecent(flNotFile(ByHashOutputFile), keepFiles); + } + } + } } diff --git a/test/integration/test-apt-ftparchive-by-hash b/test/integration/test-apt-ftparchive-by-hash new file mode 100755 index 000000000..6cda0e415 --- /dev/null +++ b/test/integration/test-apt-ftparchive-by-hash @@ -0,0 +1,66 @@ +#!/bin/sh +set -e + +verify_by_hash() { + for hash_gen in SHA1:sha1sum SHA256:sha256sum SHA512:sha512sum; do + hash=$(echo ${hash_gen} | cut -f1 -d:) + gen=$(echo ${hash_gen} | cut -f2 -d:) + testsuccess stat aptarchive/dists/unstable/main/binary-i386/by-hash/$hash/$($gen aptarchive/dists/unstable/main/binary-i386/Packages | cut -f1 -d' ') + testsuccess stat aptarchive/dists/unstable/main/binary-i386/by-hash/$hash/$($gen aptarchive/dists/unstable/main/binary-i386/Packages.gz | cut -f1 -d' ') + done +} + +# +# main() +# +TESTDIR=$(readlink -f $(dirname $0)) +. $TESTDIR/framework +setupenvironment +configarchitecture 'i386' +configcompression 'gz' '.' + +# build one pacakge +buildsimplenativepackage 'foo' 'i386' '1' 'unstable' +buildaptarchivefromincoming + +# verify initial run +verify_by_hash +previous_hash=$(sha256sum aptarchive/dists/unstable/main/binary-i386/Packages | cut -f1 -d' ') + +# insert new package +buildsimplenativepackage 'bar' 'i386' '1' 'unstable' +# and build again +buildaptarchivefromincoming + +# ensure the new package packag is there +testsuccess zgrep "Package: bar" aptarchive/dists/unstable/main/binary-i386/Packages.gz + +# ensure we have the by-hash stuff +verify_by_hash + +# ensure the old hash is still there +testsuccess stat aptarchive/dists/unstable/main/binary-i386/by-hash/SHA256/$previous_hash + +# ensure we have it in the Release file +testsuccess grep "Acquire-By-Hash: true" aptarchive/dists/unstable/*Release + +# now ensure gc work +for i in $(seq 3); do + buildsimplenativepackage 'bar' 'i386' "$i" 'unstable' + buildaptarchivefromincoming +done + +hash_count=$(ls aptarchive/dists/unstable/main/binary-i386/by-hash/SHA256/|wc -l) +# we have 2 files (uncompressed, gz) per run, 5 runs in total +# by default apt-ftparchive keeps three generations (current plus 2 older) +msgtest "Check that gc for by-hash works… " +if [ "$hash_count" = "6" ]; then + msgpass +else + echo "Got $hash_count expected 6" + msgfail +fi + +# ensure the current generation is still there +verify_by_hash + |