From 9bd27033c4786fa89cebc9d090ad2c6e8f47b598 Mon Sep 17 00:00:00 2001 From: David Kalnischkies Date: Sat, 6 Mar 2021 15:02:26 +0100 Subject: Allow merging with empty pdiff patches There isn't a lot of sense in working on empty patches as they change nothing (quite literally), but they can be the result of merging multiple patches and so to not require our users to specifically detect and remove them, we can be nice and just ignore them instead of erroring out. --- methods/rred.cc | 6 +++++- test/integration/test-method-rred | 5 +++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/methods/rred.cc b/methods/rred.cc index f53f05ad5..3453bd3bc 100644 --- a/methods/rred.cc +++ b/methods/rred.cc @@ -441,8 +441,12 @@ class Patch { bool cmdwanted = true; Change ch(std::numeric_limits::max()); - if (f.ReadLine(buffer, sizeof(buffer)) == NULL) + if (f.ReadLine(buffer, sizeof(buffer)) == nullptr) + { + if (f.Eof()) + return true; return _error->Error("Reading first line of patchfile %s failed", f.Name().c_str()); + } do { if (h != NULL) h->Add(buffer); diff --git a/test/integration/test-method-rred b/test/integration/test-method-rred index 645b0dfdd..5a42b8417 100755 --- a/test/integration/test-method-rred +++ b/test/integration/test-method-rred @@ -146,6 +146,7 @@ testrred 'Multi line change' 'more' '5,7c - even more good stuff - bonus good stuff $(tail -n 12 ./Packages)" +testrred 'Patch file' 'empty' '' "$(cat ./Packages)" failrred() { msgtest 'Failure caused by' "$1" @@ -161,7 +162,6 @@ failrred 'Bogus content' ' ' # not a problem per-se, but we want our parser to be really strict -failrred 'Empty patch file' '' failrred 'Empty line patch file' ' ' failrred 'Empty line before command' ' @@ -222,7 +222,8 @@ createpatch 'Remove more stuff and fix later' '23d, testsuccess apthelper cat-file --compress gzip Packages.ed-1 mv rootdir/tmp/testsuccess.output Packages.ed-1.gz testsuccess rm Packages.ed-1 -createpatch 'Remove (old) dog paragraph' '10,19d' > Packages.ed-2 +touch Packages.ed-2 # an empty patch +createpatch 'Remove (old) dog paragraph' '10,19d' > Packages.ed-3 mergepatches '11,19c Package: extra-kittens Version: unavailable -- cgit v1.2.3-18-g5258 From 246f66561e23911b9615bd337b3b6f6f25b6cd31 Mon Sep 17 00:00:00 2001 From: David Kalnischkies Date: Sat, 6 Mar 2021 19:55:09 +0100 Subject: Rename pdiff merge patches only after they are all downloaded The rred method expects the patches to have a certain name, which we have to rename the file to before calling the method, but by delaying the rename we ensure that if the download of one of them fails and a successful fallback occurs they are all properly cleaned up as no longer useful while in the error case the next apt run can potentially pick them up as already downloaded. Our test-pdiff-usage test was encountering this every other run, but did not fail as the check for unaccounted files in partial/ was wrapped in a subshell so that the failure produced failing output, but did not change the exit code. --- apt-pkg/acquire-item.cc | 9 ++++----- test/integration/framework | 12 ++++++------ test/integration/test-pdiff-usage | 5 +++-- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/apt-pkg/acquire-item.cc b/apt-pkg/acquire-item.cc index c9e81070b..6dc424426 100644 --- a/apt-pkg/acquire-item.cc +++ b/apt-pkg/acquire-item.cc @@ -3050,14 +3050,11 @@ void pkgAcqIndexMergeDiffs::Done(string const &Message, HashStringList const &Ha State = StateErrorDiff; return; } - std::string const PatchFile = GetMergeDiffsPatchFileName(UnpatchedFile, patch.file); std::string const PatchedFile = GetKeepCompressedFileName(UncompressedUnpatchedFile, Target); switch (State) { case StateFetchDiff: - Rename(DestFile, PatchFile); - // check if this is the last completed diff State = StateDoneDiff; for (std::vector::const_iterator I = allPatches->begin(); @@ -3068,6 +3065,8 @@ void pkgAcqIndexMergeDiffs::Done(string const &Message, HashStringList const &Ha std::clog << "Not the last done diff in the batch: " << Desc.URI << std::endl; return; } + for (auto * diff : *allPatches) + Rename(diff->DestFile, GetMergeDiffsPatchFileName(UnpatchedFile, diff->patch.file)); // this is the last completed diff, so we are ready to apply now DestFile = GetKeepCompressedFileName(UncompressedUnpatchedFile + "-patched", Target); if(Debug) @@ -3098,8 +3097,8 @@ void pkgAcqIndexMergeDiffs::Done(string const &Message, HashStringList const &Ha if(Debug) std::clog << "allDone: " << DestFile << "\n" << std::endl; return; - case StateDoneDiff: _error->Fatal("Done called for %s which is in an invalid Done state", PatchFile.c_str()); break; - case StateErrorDiff: _error->Fatal("Done called for %s which is in an invalid Error state", PatchFile.c_str()); break; + case StateDoneDiff: _error->Fatal("Done called for %s which is in an invalid Done state", patch.file.c_str()); break; + case StateErrorDiff: _error->Fatal("Done called for %s which is in an invalid Error state", patch.file.c_str()); break; } } /*}}}*/ diff --git a/test/integration/framework b/test/integration/framework index 696fcd8cd..cfde80329 100644 --- a/test/integration/framework +++ b/test/integration/framework @@ -2126,6 +2126,9 @@ aptautotest() { fi } +cdfind() { + ( cd /; find "$@" ) +} aptautotest_aptget_update() { local TESTCALL="$1" while [ -n "$2" ]; do @@ -2135,24 +2138,21 @@ aptautotest_aptget_update() { if ! test -d "${TMPWORKINGDIRECTORY}/rootdir/var/lib/apt/lists"; then return; fi testfilestats "${TMPWORKINGDIRECTORY}/rootdir/var/lib/apt" '%U:%G:%a' '=' "${TEST_DEFAULT_USER}:${TEST_DEFAULT_GROUP}:755" testfilestats "${TMPWORKINGDIRECTORY}/rootdir/var/lib/apt/lists" '%U:%G:%a' '=' "${TEST_DEFAULT_USER}:${TEST_DEFAULT_GROUP}:755" - ( - cd / # all copied files are properly chmodded local backupIFS="$IFS" IFS="$(printf "\n\b")" - find "${TMPWORKINGDIRECTORY}/rootdir/var/lib/apt/lists" -type f ! -name 'lock' | while read file; do + cdfind "${TMPWORKINGDIRECTORY}/rootdir/var/lib/apt/lists" -type f ! -name 'lock' | while read file; do testfilestats "$file" '%U:%G:%a' '=' "${TEST_DEFAULT_USER}:${TEST_DEFAULT_GROUP}:644" done IFS="$backupIFS" if [ "$TESTCALL" = 'testsuccess' ]; then # failure cases can retain partial files and such - testempty find "${TMPWORKINGDIRECTORY}/rootdir/var/lib/apt/lists/partial" -mindepth 1 ! \( -name 'lock' -o -name '*.FAILED' \) + testempty cdfind "${TMPWORKINGDIRECTORY}/rootdir/var/lib/apt/lists/partial" -mindepth 1 ! \( -name 'lock' -o -name '*.FAILED' \) fi if [ -s "${TMPWORKINGDIRECTORY}/rootdir/var/log/aptgetupdate.before.lst" ]; then testfileequal "${TMPWORKINGDIRECTORY}/rootdir/var/log/aptgetupdate.before.lst" \ - "$(find "${TMPWORKINGDIRECTORY}/aptarchive/dists" -type f | while read line; do stat --format '%U:%G:%a:%n' "$line"; done | sort)" + "$(cdfind "${TMPWORKINGDIRECTORY}/aptarchive/dists" -type f | while read line; do stat --format '%U:%G:%a:%n' "$line"; done | sort)" fi - ) } aptautotest_apt_update() { aptautotest_aptget_update "$@"; } aptautotest_aptcdrom_add() { aptautotest_aptget_update "$@"; } diff --git a/test/integration/test-pdiff-usage b/test/integration/test-pdiff-usage index c5726dd08..5844619b8 100755 --- a/test/integration/test-pdiff-usage +++ b/test/integration/test-pdiff-usage @@ -254,7 +254,7 @@ SHA256-Download: # This should work in at least 4% of the cases... for i in $(seq 25); do testfailure apt update "$@" - if ! grep 'rred:600' rootdir/tmp/testfailure.output; then + if ! grep -q 'rred:600' rootdir/tmp/testfailure.output; then break fi done @@ -281,7 +281,8 @@ SHA256-Download: rm "${PATCHFILE}.gz" testsuccess apt update "$@" cp rootdir/tmp/testsuccess.output patchdownload.output - testsuccess grep '^Falling back to normal index file acquire' patchdownload.output + # it should be anchored on line start, but multiple processes on the same output stream… + testsuccess grep 'Falling back to normal index file acquire' patchdownload.output testnopackage oldstuff testsuccessequal "$(cat Packages-future) " aptcache show apt newstuff futurestuff -- cgit v1.2.3-18-g5258 From 59933938f51105066161a6eb88253006826336a2 Mon Sep 17 00:00:00 2001 From: David Kalnischkies Date: Sun, 7 Mar 2021 00:47:26 +0100 Subject: Start pdiff patching from the last possible starting point Especially in small sections of an archive it can happen that an index returns to a previous state (e.g. if a package was first added and then removed with no other changes happening in between). The result is that we have multiple patches which start from the same hash which if we perform clientside merging is no problem although not ideal as we perform needless work. For serverside merging it would not matter, but due to rred previously refusing to merge zero-size patches but dak ignoring failure letting it carry these size-zero patches until they naturally expire we run into a problem as these broken patches won't do and force us to fall back to downloading the entire index. By always starting from the last patch instead of the first with the starter hash we can avoid this problem and behave optimally in clientside merge cases, too. --- apt-pkg/acquire-item.cc | 26 ++++++++------------------ test/integration/test-pdiff-usage | 3 +++ 2 files changed, 11 insertions(+), 18 deletions(-) diff --git a/apt-pkg/acquire-item.cc b/apt-pkg/acquire-item.cc index 6dc424426..cc3d2f1ff 100644 --- a/apt-pkg/acquire-item.cc +++ b/apt-pkg/acquire-item.cc @@ -2564,26 +2564,16 @@ bool pkgAcqDiffIndex::ParseDiffIndex(string const &IndexDiffFile) /*{{{*/ } } - - bool foundStart = false; - for (std::vector::iterator cur = available_patches.begin(); - cur != available_patches.end(); ++cur) - { - if (LocalHashes != cur->result_hashes) - continue; - - available_patches.erase(available_patches.begin(), cur); - foundStart = true; - break; - } - - if (foundStart == false || unlikely(available_patches.empty() == true)) { - ErrorText = "Couldn't find the start of the patch series"; - return false; - } + auto const foundStart = std::find_if(available_patches.rbegin(), available_patches.rend(), + [&](auto const &cur) { return LocalHashes == cur.result_hashes; }); + if (foundStart == available_patches.rend() || unlikely(available_patches.empty())) + { + ErrorText = "Couldn't find the start of the patch series"; + return false; + } + available_patches.erase(available_patches.begin(), std::prev(foundStart.base())); - { auto const patch = std::find_if(available_patches.cbegin(), available_patches.cend(), [](auto const &patch) { return not patch.result_hashes.usable() || not patch.patch_hashes.usable() || diff --git a/test/integration/test-pdiff-usage b/test/integration/test-pdiff-usage index 5844619b8..b727aa138 100755 --- a/test/integration/test-pdiff-usage +++ b/test/integration/test-pdiff-usage @@ -100,12 +100,15 @@ testrun() { PATCHINDEX='aptarchive/Packages.diff/Index' echo "SHA256-Current: $(sha256sum "${PKGFILE}-new" | cut -d' ' -f 1) $(stat -c%s "${PKGFILE}-new") SHA256-History: + $(sha256sum "$PKGFILE" | cut -d' ' -f 1) $(stat -c%s "$PKGFILE") 2000-08-18-2013.28 01ba4719c80b6fe911b091a7c05124b64eeece964e09c058ef8f9805daca546b 33053002 2010-08-18-2013.28 $(sha256sum "$PKGFILE" | cut -d' ' -f 1) $(stat -c%s "$PKGFILE") $(basename "$PATCHFILE") SHA256-Patches: + $(sha256sum "$PATCHFILE" | cut -d' ' -f 1) $(stat -c%s "$PATCHFILE") 2000-08-18-2013.28 e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 19722 2010-08-18-2013.28 $(sha256sum "$PATCHFILE" | cut -d' ' -f 1) $(stat -c%s "$PATCHFILE") $(basename "$PATCHFILE") SHA256-Download: + $(sha256sum "${PATCHFILE}.gz" | cut -d' ' -f 1) $(stat -c%s "${PATCHFILE}.gz") 2000-08-18-2013.28.gz d2a1b33187ed2d248eeae3b1223ea71791ea35f2138a713ed371332a6421f467 197 2010-08-18-2013.28.gz $(sha256sum "${PATCHFILE}.gz" | cut -d' ' -f 1) $(stat -c%s "${PATCHFILE}.gz") $(basename "${PATCHFILE}.gz")" > "$PATCHINDEX" -- cgit v1.2.3-18-g5258 From 2a81f98b124d8fe551b160df55db1d3bf79a77c1 Mon Sep 17 00:00:00 2001 From: David Kalnischkies Date: Sat, 6 Mar 2021 16:11:34 +0100 Subject: Ensure all index files sent custom tags to the methods MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The mirror method can distribute requests for files based on various metadata bits, but some – the main index files – weren't actually passing those on to the methods as advertised in the manpage. This is hidden both by mirror usually falling back to other sources which will eventually hit the right one and that if the repository does not support by-hash apt will automatically stick to the mirror which was used for the Release file. --- apt-pkg/acquire-item.cc | 11 ++++++----- test/integration/test-method-mirror | 10 ++++++++++ 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/apt-pkg/acquire-item.cc b/apt-pkg/acquire-item.cc index cc3d2f1ff..ab4306aac 100644 --- a/apt-pkg/acquire-item.cc +++ b/apt-pkg/acquire-item.cc @@ -3177,8 +3177,8 @@ void pkgAcqIndex::Init(string const &URI, string const &URIDesc, /* The only header we use is the last-modified header. */ string pkgAcqIndex::Custom600Headers() const { - - string msg = "\nIndex-File: true"; + std::string msg = pkgAcqBaseIndex::Custom600Headers(); + msg.append("\nIndex-File: true"); if (TransactionManager->LastMetaIndexParser == NULL) { @@ -3930,9 +3930,10 @@ void pkgAcqFile::Done(string const &Message,HashStringList const &CalcHashes, /*}}}*/ string pkgAcqFile::Custom600Headers() const /*{{{*/ { - if (IsIndexFile) - return "\nIndex-File: true"; - return ""; + string Header = pkgAcquire::Item::Custom600Headers(); + if (not IsIndexFile) + return Header; + return Header + "\nIndex-File: true"; } /*}}}*/ pkgAcqFile::~pkgAcqFile() {} diff --git a/test/integration/test-method-mirror b/test/integration/test-method-mirror index 56c9a10a0..81a5585fd 100755 --- a/test/integration/test-method-mirror +++ b/test/integration/test-method-mirror @@ -192,6 +192,16 @@ http://localhost:${APTHTTPPORT}/redirectme type:deb testfailure apt update testrundownload 'foo=2' +msgmsg 'Mirrors can be filtered by' 'by-hash type' +echo "http://localhost:${APTHTTPPORT}/failure type:foobar priority:1 +http://localhost:${APTHTTPPORT}/redirectme type:index type:deb +" > aptarchive/mirror.txt +rm -rf rootdir/var/lib/apt/lists +testsuccess apt update -o Acquire::By-Hash=force #-o Debug::pkgAcquire::Worker=1 +cp rootdir/tmp/testsuccess.output forcedbyhash.output +testfailure grep "localhost:${APTHTTPPORT}/failure" forcedbyhash.output +testrundownload 'foo=2' + msgmsg 'The prefix for the mirrorlist is' 'passed on' echo 'Dir::Bin::Methods::foo+mirror+file "mirror"; Dir::Bin::Methods::foo+mirror+http "mirror"; -- cgit v1.2.3-18-g5258