From f66abc53491ab130c2dbea483ba241edf6da2e88 Mon Sep 17 00:00:00 2001 From: frog Date: Wed, 20 Nov 2002 17:55:46 +0000 Subject: [PATCH] * src/gdcm.h and gdcmHeader.cxx gdcm no longer loads all the elements values, but limits itself to values which are below a user specified size (by default 1024 bytes). * python/gdcm.i : elements not loaded (because their size is above the user specified treshold) are exported with their TagKey as key of the python dictionary. This enables the testsuite to make some checks on the unloaded elements (since we have their offset and size). * python/testSuite.py - now tests for the existence and proper value of the pixel data - two new examples added. * Data/gdcm-MR-SIEMENS-16.acr[12] added. --- Frog --- ChangeLog | 13 +++++++ src/gdcm.h | 17 +++++++-- src/gdcmHeader.cxx | 95 +++++++++++++++++++++++++++++----------------- 3 files changed, 86 insertions(+), 39 deletions(-) diff --git a/ChangeLog b/ChangeLog index 15d5e6e9..36525709 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,16 @@ +2002-11-20 Eric Boix + * src/gdcm.h and gdcmHeader.cxx gdcm no longer loads all the elements + values, but limits itself to values which are below a user specified + size (by default 1024 bytes). + * python/gdcm.i : elements not loaded (because their size is above the + user specified treshold) are exported with their TagKey as key of the + python dictionary. This enables the testsuite to make some checks + on the unloaded elements (since we have their offset and size). + * python/testSuite.py + - now tests for the existence and proper value of the pixel data + - two new examples added. + * Data/gdcm-MR-SIEMENS-16.acr[12] added. + 2002-11-18 Eric Boix * src/gdcm.h and gdcmHeader.cxx are now "Big Endian transfer syntax" aware. See the strategy comments in gdcmHeader::FindeLength(). diff --git a/src/gdcm.h b/src/gdcm.h index c84f9c00..22f3eb9e 100644 --- a/src/gdcm.h +++ b/src/gdcm.h @@ -221,13 +221,15 @@ class GDCM_EXPORT gdcmHeader { //BigEndian, //BadBigEndian}; private: - // All instances share the same value representation dictionary + // All instances share the same Value Representation dictionary static VRHT *dicom_vr; + // Dictionaries of data elements: static gdcmDictSet* Dicts; // Global dictionary container gdcmDict* RefPubDict; // Public Dictionary gdcmDict* RefShaDict; // Shadow Dictionary (optional) - ElValSet PubElVals; // Element Values parsed with Public Dictionary - ElValSet ShaElVals; // Element Values parsed with Shadow Dictionary + // Parsed element values: + ElValSet PubElVals; // parsed with Public Dictionary + ElValSet ShaElVals; // parsed with Shadow Dictionary // In order to inspect/navigate through the file string filename; FILE * fp; @@ -237,7 +239,13 @@ private: // outside of the elements: guint16 grPixel; guint16 numPixel; + // Swap code (little, big, big-bad endian): this code is not fixed + // during parsing. int sw; + // Only the elements whose size are below this bound shall be loaded. + // By default, this upper bound is limited to 1024 (which looks reasonable + // when one considers the definition of the various VR contents). + guint32 MaxSizeLoadElementValue; guint16 ReadInt16(void); guint32 ReadInt32(void); @@ -255,7 +263,8 @@ private: void FixFoundLength(ElValue*, guint32); bool IsAnInteger(ElValue *); bool IsBigEndianTransferSyntax(void); - ElValue * ReadNextElement(void); + void SetMaxSizeLoadElementValue(long); + ElValue * ReadNextElement(void); gdcmDictEntry * IsInDicts(guint32, guint32); size_t GetPixelOffset(void); protected: diff --git a/src/gdcmHeader.cxx b/src/gdcmHeader.cxx index ef794b93..a2bece51 100644 --- a/src/gdcmHeader.cxx +++ b/src/gdcmHeader.cxx @@ -38,6 +38,7 @@ void gdcmHeader::Initialise(void) { } gdcmHeader::gdcmHeader (const char* InFilename) { + SetMaxSizeLoadElementValue(1024); filename = InFilename; Initialise(); fp=fopen(InFilename,"rw"); @@ -56,32 +57,32 @@ void gdcmHeader::InitVRDict (void) { return; } VRHT *vr = new VRHT; - (*vr)["AE"] = "Application Entity"; // 16 car max - (*vr)["AS"] = "Age String"; // 4 car fixe - (*vr)["AT"] = "Attribute Tag"; // 2 unsigned short int - (*vr)["CS"] = "Code String"; // 16 car max - (*vr)["DA"] = "Date"; // 8 car fixe - (*vr)["DS"] = "Decimal String"; // Decimal codé Binaire 16 max - (*vr)["DT"] = "Date Time"; // 26 car max - (*vr)["FL"] = "Floating Point Single"; // 4 octets IEEE 754:1985 - (*vr)["FD"] = "Floating Point Double"; // 8 octets IEEE 754:1985 - (*vr)["IS"] = "Integer String"; // en format externe 12 max - (*vr)["LO"] = "Long String"; // 64 octets max - (*vr)["LT"] = "Long Text"; // 10240 max - (*vr)["OB"] = "Other Byte String"; - (*vr)["OW"] = "Other Word String"; - (*vr)["PN"] = "Person Name"; - (*vr)["SH"] = "Short String"; // 16 car max - (*vr)["SL"] = "Signed Long"; + (*vr)["AE"] = "Application Entity"; // At most 16 bytes + (*vr)["AS"] = "Age String"; // Exactly 4 bytes + (*vr)["AT"] = "Attribute Tag"; // 2 16-bit unsigned short integers + (*vr)["CS"] = "Code String"; // At most 16 bytes + (*vr)["DA"] = "Date"; // Exactly 8 bytes + (*vr)["DS"] = "Decimal String"; // At most 16 bytes + (*vr)["DT"] = "Date Time"; // At most 26 bytes + (*vr)["FL"] = "Floating Point Single"; // 32-bit IEEE 754:1985 float + (*vr)["FD"] = "Floating Point Double"; // 64-bit IEEE 754:1985 double + (*vr)["IS"] = "Integer String"; // At most 12 bytes + (*vr)["LO"] = "Long String"; // At most 64 chars + (*vr)["LT"] = "Long Text"; // At most 10240 chars + (*vr)["OB"] = "Other Byte String"; // String of bytes (vr independant) + (*vr)["OW"] = "Other Word String"; // String of 16-bit words (vr dep) + (*vr)["PN"] = "Person Name"; // At most 64 chars + (*vr)["SH"] = "Short String"; // At most 16 chars + (*vr)["SL"] = "Signed Long"; // Exactly 4 bytes (*vr)["SQ"] = "Sequence of Items"; // Not Applicable - (*vr)["SS"] = "Signed Short"; // 2 octets - (*vr)["ST"] = "Short Text"; // 1024 car max - (*vr)["TM"] = "Time"; // 16 car max - (*vr)["UI"] = "Unique Identifier"; // 64 car max - (*vr)["UN"] = "Unknown"; - (*vr)["UT"] = "Unlimited Text"; // 2 puissance 32 -1 car max - (*vr)["UL"] = "Unsigned Long "; // 4 octets fixe - (*vr)["US"] = "Unsigned Short "; // 2 octets fixe + (*vr)["SS"] = "Signed Short"; // Exactly 2 bytes + (*vr)["ST"] = "Short Text"; // At most 1024 chars + (*vr)["TM"] = "Time"; // At most 16 bytes + (*vr)["UI"] = "Unique Identifier"; // At most 64 bytes + (*vr)["UL"] = "Unsigned Long "; // Exactly 4 bytes + (*vr)["UN"] = "Unknown"; // Any length of bytes + (*vr)["US"] = "Unsigned Short "; // Exactly 2 bytes + (*vr)["UT"] = "Unlimited Text"; // At most 2^32 -1 chars dicom_vr = vr; } @@ -225,7 +226,7 @@ void gdcmHeader::CheckSwap() } void gdcmHeader::SwitchSwapToBigEndian(void) { - dbg.Verbose(0, "gdcmHeader::FindLength", "Switching to BigEndian mode."); + dbg.Verbose(1, "gdcmHeader::FindLength", "Switching to BigEndian mode."); if ( sw == 0 ) { sw = 4321; return; @@ -370,11 +371,12 @@ void gdcmHeader::FindLength( ElValue * ElVal) { guint16 length16; if ( (filetype == ExplicitVR) && ! ElVal->IsImplicitVr() ) { + if ( (vr=="OB") || (vr=="OW") || (vr=="SQ") || (vr=="UN") ) { - - // The following two bytes are reserved, so we skip them, - // and we proceed on reading the length on 4 bytes. - fseek(fp, 2L,SEEK_CUR); + // The following reserved two bytes (see PS 3.5-2001, section + // 7.1.2 Data element structure with explicit vr p27) must be + // skipped before proceeding on reading the length on 4 bytes. + fseek(fp, 2L, SEEK_CUR); FixFoundLength(ElVal, ReadInt32()); return; } @@ -438,12 +440,15 @@ void gdcmHeader::FindLength( ElValue * ElVal) { return; } - // Either implicit VR or an explicit VR that (at least for this - // element) lied a little bit. Length is on 4 bytes. + // Either implicit VR or a non DICOM conformal (see not below) explicit + // VR that ommited the VR of (at least) this element. Farts happen. + // [Note: according to the part 5, PS 3.5-2001, section 7.1 p25 + // on Data elements "Implicit and Explicit VR Data Elements shall + // not coexist in a Data Set and Data Sets nested within it".] + // Length is on 4 bytes. FixFoundLength(ElVal, ReadInt32()); } - /** * \ingroup gdcmHeader * \brief Swaps back the bytes of 4-byte long integer accordingly to @@ -491,6 +496,16 @@ void gdcmHeader::SkipElementValue(ElValue * ElVal) { (void)fseek(fp, (long)ElVal->GetLength(), SEEK_CUR); } +void gdcmHeader::SetMaxSizeLoadElementValue(long NewSize) { + if (NewSize < 0) + return; + if ((guint32)NewSize >= (guint32)0xffffffff) { + MaxSizeLoadElementValue = 0xffffffff; + return; + } + MaxSizeLoadElementValue = NewSize; +} + /** * \ingroup gdcmHeader * \brief Loads the element if it's size is not to big. @@ -526,6 +541,7 @@ void gdcmHeader::LoadElementValue(ElValue * ElVal) { SkipLoad = true; if ( SkipLoad ) { + // FIXME the following skip is not necessary SkipElementValue(ElVal); ElVal->SetLength(0); ElVal->SetValue("gdcm::Skipped"); @@ -537,6 +553,17 @@ void gdcmHeader::LoadElementValue(ElValue * ElVal) { ElVal->SetValue(""); return; } + + // Values bigger than specified are not loaded. + if (length > MaxSizeLoadElementValue) { + ostringstream s; + s << "gdcm::NotLoaded."; + s << " Address:" << (long)ElVal->GetOffset(); + s << " Length:" << ElVal->GetLength(); + //mesg += " Length:" + ElVal->GetLength(); + ElVal->SetValue(s.str()); + return; + } // When an integer is expected, read and convert the following two or // four bytes properly i.e. as an integer as opposed to a string. @@ -564,8 +591,6 @@ void gdcmHeader::LoadElementValue(ElValue * ElVal) { } NewValue[length]= 0; - // FIXME les elements trop long (seuil a fixer a la main) ne devraient - // pas etre charge's !!!! Voir TODO. item_read = fread(NewValue, (size_t)length, (size_t)1, fp); if ( item_read != 1 ) { g_free(NewValue); -- 2.48.1