diff options
-rw-r--r-- | apt-pkg/contrib/strutl.cc | 124 | ||||
-rw-r--r-- | test/libapt/strutil_test.cc | 70 |
2 files changed, 136 insertions, 58 deletions
diff --git a/apt-pkg/contrib/strutl.cc b/apt-pkg/contrib/strutl.cc index aad358a55..ebf9c9ea6 100644 --- a/apt-pkg/contrib/strutl.cc +++ b/apt-pkg/contrib/strutl.cc @@ -779,86 +779,94 @@ string TimeRFC1123(time_t Date) In particular: this reads blocks from the input until it believes that it's run out of input text. Each block is terminated by a - double newline ('\n' followed by '\n'). As noted below, there is a - bug in this code: it assumes that all the blocks have been read if - it doesn't see additional text in the buffer after the last one is - parsed, which will cause it to lose blocks if the last block - coincides with the end of the buffer. + double newline ('\n' followed by '\n'). */ bool ReadMessages(int Fd, vector<string> &List) { char Buffer[64000]; - char *End = Buffer; // Represents any left-over from the previous iteration of the // parse loop. (i.e., if a message is split across the end // of the buffer, it goes here) string PartialMessage; - - while (1) - { - int Res = read(Fd,End,sizeof(Buffer) - (End-Buffer)); + + do { + int const Res = read(Fd, Buffer, sizeof(Buffer)); if (Res < 0 && errno == EINTR) continue; - - // Process is dead, this is kind of bad.. + + // process we read from has died if (Res == 0) return false; - + // No data - if (Res < 0 && errno == EAGAIN) + if (Res < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) return true; if (Res < 0) return false; - - End += Res; - - // Look for the end of the message - for (char *I = Buffer; I + 1 < End; I++) + + // extract the message(s) from the buffer + char const *Start = Buffer; + char const * const End = Buffer + Res; + + char const * NL = (char const *) memchr(Start, '\n', End - Start); + if (NL == NULL) { - if (I[1] != '\n' || - (I[0] != '\n' && strncmp(I, "\r\n\r\n", 4) != 0)) - continue; - - // Pull the message out - string Message(Buffer,I-Buffer); - PartialMessage += Message; - - // Fix up the buffer - for (; I < End && (*I == '\n' || *I == '\r'); ++I); - End -= I-Buffer; - memmove(Buffer,I,End-Buffer); - I = Buffer; - - List.push_back(PartialMessage); - PartialMessage.clear(); + // end of buffer: store what we have so far and read new data in + PartialMessage.append(Start, End - Start); + Start = End; } - if (End != Buffer) - { - // If there's text left in the buffer, store it - // in PartialMessage and throw the rest of the buffer - // away. This allows us to handle messages that - // are longer than the static buffer size. - PartialMessage += string(Buffer, End); - End = Buffer; - } else - { - // BUG ALERT: if a message block happens to end at a - // multiple of 64000 characters, this will cause it to - // terminate early, leading to a badly formed block and - // probably crashing the method. However, this is the only - // way we have to find the end of the message block. I have - // an idea of how to fix this, but it will require changes - // to the protocol (essentially to mark the beginning and - // end of the block). - // - // -- dburrows 2008-04-02 - return true; - } + ++NL; + + if (PartialMessage.empty() == false && Start < End) + { + // if we start with a new line, see if the partial message we have ended with one + // so that we properly detect records ending between two read() runs + // cases are: \n|\n , \r\n|\r\n and \r\n\r|\n + // the case \r|\n\r\n is handled by the usual double-newline handling + if ((NL - Start) == 1 || ((NL - Start) == 2 && *Start == '\r')) + { + if (APT::String::Endswith(PartialMessage, "\n") || APT::String::Endswith(PartialMessage, "\r\n\r")) + { + PartialMessage.erase(PartialMessage.find_last_not_of("\r\n") + 1); + List.push_back(PartialMessage); + PartialMessage.clear(); + while (NL < End && (*NL == '\n' || *NL == '\r')) ++NL; + Start = NL; + } + } + } + + while (Start < End) { + char const * NL2 = (char const *) memchr(NL, '\n', End - NL); + if (NL2 == NULL) + { + // end of buffer: store what we have so far and read new data in + PartialMessage.append(Start, End - Start); + break; + } + ++NL2; + + // did we find a double newline? + if ((NL2 - NL) == 1 || ((NL2 - NL) == 2 && *NL == '\r')) + { + PartialMessage.append(Start, NL2 - Start); + PartialMessage.erase(PartialMessage.find_last_not_of("\r\n") + 1); + List.push_back(PartialMessage); + PartialMessage.clear(); + while (NL2 < End && (*NL2 == '\n' || *NL2 == '\r')) ++NL2; + Start = NL2; + } + NL = NL2; + } + + // we have read at least one complete message and nothing left + if (PartialMessage.empty() == true) + return true; if (WaitFd(Fd) == false) return false; - } + } while (true); } /*}}}*/ // MonthConv - Converts a month string into a number /*{{{*/ diff --git a/test/libapt/strutil_test.cc b/test/libapt/strutil_test.cc index 494159c87..9bc3c76fd 100644 --- a/test/libapt/strutil_test.cc +++ b/test/libapt/strutil_test.cc @@ -1,10 +1,13 @@ #include <config.h> #include <apt-pkg/strutl.h> +#include <apt-pkg/fileutl.h> #include <string> #include <vector> #include <gtest/gtest.h> +#include "file-helpers.h" + TEST(StrUtilTest,DeEscapeString) { // nothing special @@ -143,3 +146,70 @@ TEST(StrUtilTest,Base64Encode) EXPECT_EQ("Lg==", Base64Encode(".")); EXPECT_EQ("", Base64Encode("")); } +void ReadMessagesTestWithNewLine(char const * const nl, char const * const ab) +{ + SCOPED_TRACE(SubstVar(SubstVar(nl, "\n", "n"), "\r", "r") + " # " + ab); + FileFd fd; + std::string pkgA = "Package: pkgA\n" + "Version: 1\n" + "Size: 100\n" + "Description: aaa\n" + " aaa"; + std::string pkgB = "Package: pkgB\n" + "Version: 1\n" + "Flag: no\n" + "Description: bbb"; + std::string pkgC = "Package: pkgC\n" + "Version: 2\n" + "Flag: yes\n" + "Description:\n" + " ccc"; + + createTemporaryFile("readmessage", fd, NULL, (pkgA + nl + pkgB + nl + pkgC + nl).c_str()); + std::vector<std::string> list; + EXPECT_TRUE(ReadMessages(fd.Fd(), list)); + EXPECT_EQ(3, list.size()); + EXPECT_EQ(pkgA, list[0]); + EXPECT_EQ(pkgB, list[1]); + EXPECT_EQ(pkgC, list[2]); + + size_t const msgsize = 63990; + createTemporaryFile("readmessage", fd, NULL, NULL); + for (size_t j = 0; j < msgsize; ++j) + fd.Write(ab, strlen(ab)); + for (size_t i = 0; i < 21; ++i) + { + std::string msg; + strprintf(msg, "msgsize=%zu i=%zu", msgsize, i); + SCOPED_TRACE(msg); + fd.Seek((msgsize + (i - 1)) * strlen(ab)); + fd.Write(ab, strlen(ab)); + fd.Write(nl, strlen(nl)); + fd.Seek(0); + list.clear(); + EXPECT_TRUE(ReadMessages(fd.Fd(), list)); + EXPECT_EQ(1, list.size()); + EXPECT_EQ((msgsize + i) * strlen(ab), list[0].length()); + EXPECT_EQ(std::string::npos, list[0].find_first_not_of(ab)); + } + + list.clear(); + fd.Write(pkgA.c_str(), pkgA.length()); + fd.Write(nl, strlen(nl)); + fd.Seek(0); + EXPECT_TRUE(ReadMessages(fd.Fd(), list)); + EXPECT_EQ(2, list.size()); + EXPECT_EQ((msgsize + 20) * strlen(ab), list[0].length()); + EXPECT_EQ(std::string::npos, list[0].find_first_not_of(ab)); + EXPECT_EQ(pkgA, list[1]); + + + fd.Close(); +} +TEST(StrUtilTest,ReadMessages) +{ + ReadMessagesTestWithNewLine("\n\n", "a"); + ReadMessagesTestWithNewLine("\r\n\r\n", "a"); + ReadMessagesTestWithNewLine("\n\n", "ab"); + ReadMessagesTestWithNewLine("\r\n\r\n", "ab"); +} |