summaryrefslogtreecommitdiff
path: root/apt-pkg/tagfile.h
diff options
context:
space:
mode:
authorDavid Kalnischkies <david@kalnischkies.de>2014-05-10 11:24:44 +0200
committerDavid Kalnischkies <david@kalnischkies.de>2014-05-10 11:24:44 +0200
commit8710a36a01c0cb1648926792c2ad05185535558e (patch)
tree0f4bc04b87ae5926d9f855f7268ee84802232749 /apt-pkg/tagfile.h
parentffe3c68e494efc1c2c4d748fa9d69e150867e5c2 (diff)
improve pkgTagSection scanning and parsing
Removes the 256 fields limit, deals consistently with spaces littered all over the place and is even a tiny bit faster than before. Even comes with a bunch of new tests to validate these claims.
Diffstat (limited to 'apt-pkg/tagfile.h')
-rw-r--r--apt-pkg/tagfile.h63
1 files changed, 42 insertions, 21 deletions
diff --git a/apt-pkg/tagfile.h b/apt-pkg/tagfile.h
index d1a24ba45..b0cfab759 100644
--- a/apt-pkg/tagfile.h
+++ b/apt-pkg/tagfile.h
@@ -25,6 +25,8 @@
#include <stdio.h>
#include <string>
+#include <vector>
+#include <list>
#ifndef APT_8_CLEANER_HEADERS
#include <apt-pkg/fileutl.h>
@@ -35,23 +37,20 @@ class FileFd;
class pkgTagSection
{
const char *Section;
- // We have a limit of 256 tags per section.
- unsigned int Indexes[256];
- unsigned int AlphaIndexes[0x100];
- unsigned int TagCount;
+ struct TagData {
+ unsigned int StartTag;
+ unsigned int EndTag;
+ unsigned int StartValue;
+ unsigned int NextInBucket;
+
+ TagData(unsigned int const StartTag) : StartTag(StartTag), NextInBucket(0) {}
+ };
+ std::vector<TagData> Tags;
+ unsigned int LookupTable[0x100];
+
// dpointer placeholder (for later in case we need it)
void *d;
- /* This very simple hash function for the last 8 letters gives
- very good performance on the debian package files */
- inline static unsigned long AlphaHash(const char *Text, const char *End = 0)
- {
- unsigned long Res = 0;
- for (; Text != End && *Text != ':' && *Text != 0; Text++)
- Res = ((unsigned long)(*Text) & 0xDF) ^ (Res << 1);
- return Res & 0xFF;
- }
-
protected:
const char *Stop;
@@ -69,17 +68,39 @@ class pkgTagSection
unsigned long Flag) const;
bool static FindFlag(unsigned long &Flags, unsigned long Flag,
const char* Start, const char* Stop);
- bool Scan(const char *Start,unsigned long MaxLength);
+
+ /** \brief searches the boundaries of the current section
+ *
+ * While parameter Start marks the beginning of the section, this method
+ * will search for the first double newline in the data stream which marks
+ * the end of the section. It also does a first pass over the content of
+ * the section parsing it as encountered for processing later on by Find
+ *
+ * @param Start is the beginning of the section
+ * @param MaxLength is the size of valid data in the stream pointed to by Start
+ * @param Restart if enabled internal state will be cleared, otherwise it is
+ * assumed that now more data is available in the stream and the parsing will
+ * start were it encountered insufficent data the last time.
+ *
+ * @return \b true if section end was found, \b false otherwise.
+ * Beware that internal state will be inconsistent if \b false is returned!
+ */
+ APT_MUSTCHECK bool Scan(const char *Start, unsigned long MaxLength, bool const Restart = true);
inline unsigned long size() const {return Stop - Section;};
void Trim();
virtual void TrimRecord(bool BeforeRecord, const char* &End);
-
- inline unsigned int Count() const {return TagCount;};
- bool Exists(const char* const Tag);
-
+
+ /** \brief amount of Tags in the current section
+ *
+ * Note: if a Tag is mentioned repeatly it will be counted multiple
+ * times, but only the last occurance is available via Find methods.
+ */
+ unsigned int Count() const;
+ bool Exists(const char* const Tag) const;
+
inline void Get(const char *&Start,const char *&Stop,unsigned int I) const
- {Start = Section + Indexes[I]; Stop = Section + Indexes[I+1];}
-
+ {Start = Section + Tags[I].StartTag; Stop = Section + Tags[I+1].StartTag;}
+
inline void GetSection(const char *&Start,const char *&Stop) const
{
Start = Section;