-#include "gdcmlib.h"
-extern "C" {
-#include "glib.h"
-}
+#include "gdcm.h"
#include <stdio.h>
// For nthos:
#ifdef _MSC_VER
#else
#include <netinet/in.h>
#endif
+#include <cctype> // for isalpha
#include <map>
#include <sstream>
#include "gdcmUtil.h"
RefShaDict = (gdcmDict*)0;
}
-gdcmHeader::gdcmHeader (char* InFilename) {
+gdcmHeader::gdcmHeader (const char* InFilename) {
filename = InFilename;
Initialise();
fp=fopen(InFilename,"rw");
entCur = deb + 128;
if(memcmp(entCur, "DICM", (size_t)4) == 0) {
filetype = TrueDicom;
- dbg.Verbose(0, "gdcmHeader::CheckSwap:", "looks like DICOM Version3");
+ dbg.Verbose(1, "gdcmHeader::CheckSwap:", "looks like DICOM Version3");
} else {
filetype = Unknown;
- dbg.Verbose(0, "gdcmHeader::CheckSwap:", "not a DICOM Version3 file");
+ dbg.Verbose(1, "gdcmHeader::CheckSwap:", "not a DICOM Version3 file");
}
if(filetype == TrueDicom) {
entCur = deb + 136;
if(memcmp(entCur, "UL", (size_t)2) == 0) {
filetype = ExplicitVR;
- dbg.Verbose(0, "gdcmHeader::CheckSwap:",
+ dbg.Verbose(1, "gdcmHeader::CheckSwap:",
"explicit Value Representation");
} else {
filetype = ImplicitVR;
- dbg.Verbose(0, "gdcmHeader::CheckSwap:",
+ dbg.Verbose(1, "gdcmHeader::CheckSwap:",
"not an explicit Value Representation");
}
if (net2host) {
sw = 4321;
- dbg.Verbose(0, "gdcmHeader::CheckSwap:",
+ dbg.Verbose(1, "gdcmHeader::CheckSwap:",
"HostByteOrder != NetworkByteOrder");
} else {
sw = 0;
- dbg.Verbose(0, "gdcmHeader::CheckSwap:",
+ dbg.Verbose(1, "gdcmHeader::CheckSwap:",
"HostByteOrder = NetworkByteOrder");
}
// four (bytes), then determining the proper swap code is straightforward.
entCur = deb + 4;
- s = str2num(entCur, int);
+ s = str2num(entCur, guint32);
switch (s) {
case 0x00040000 :
- sw=3412;
+ sw = 3412;
filetype = ACR;
return;
case 0x04000000 :
- sw=4321;
+ sw = 4321;
filetype = ACR;
return;
case 0x00000400 :
- sw=2143;
+ sw = 2143;
filetype = ACR;
return;
case 0x00000004 :
- sw=0;
+ sw = 0;
filetype = ACR;
return;
default :
// We are out of luck. It is not a DicomV3 nor a 'clean' ACR/NEMA file.
// It is time for despaired wild guesses. So, let's assume this file
- // happens to be 'dirty' ACR/NEMA, i.e. the length of the group it
+ // happens to be 'dirty' ACR/NEMA, i.e. the length of the group is
// not present. Then the only info we have is the net2host one.
//FIXME Si c'est du RAW, ca degagera + tard
return;
}
+void gdcmHeader::SwitchSwapToBigEndian(void) {
+ dbg.Verbose(0, "gdcmHeader::FindLength", "Switching to BigEndian mode.");
+ if ( sw == 0 ) {
+ sw = 4321;
+ return;
+ }
+ if ( sw == 4321 ) {
+ sw = 0;
+ return;
+ }
+ if ( sw == 3412 ) {
+ sw = 2143;
+ return;
+ }
+ if ( sw == 2143 )
+ sw = 3412;
+}
+
/**
* \ingroup gdcmHeader
* \brief recupere la longueur d'un champ DICOM.
void gdcmHeader::FindVR( ElValue *ElVal) {
char VR[3];
+ string vr;
int lgrLue;
long PositionOnEntry = ftell(fp);
-
- if (filetype != ExplicitVR)
- return;
-
- lgrLue=fread (&VR, (size_t)2,(size_t)1, fp);
- VR[2]=0;
-
// Warning: we believe this is explicit VR (Value Representation) because
// we used a heuristic that found "UL" in the first tag. Alas this
// doesn't guarantee that all the tags will be in explicit VR. In some
// within an explicit VR file. Hence we make sure the present tag
// is in explicit VR and try to fix things if it happens not to be
// the case.
+ bool RealExplicit = true;
+
+ if (filetype != ExplicitVR)
+ return;
- // FIXME There should be only one occurence returned. Avoid the
- // first extraction by calling proper method.
- VRAtr FoundVR = dicom_vr->find(string(VR))->first;
- if ( ! FoundVR.empty()) {
- ElVal->SetVR(FoundVR);
+ lgrLue=fread (&VR, (size_t)2,(size_t)1, fp);
+ VR[2]=0;
+ vr = string(VR);
+
+ // Assume we are reading a falsely explicit VR file i.e. we reached
+ // a tag where we expect reading a VR but are in fact we read the
+ // first to bytes of the length. Then we will interogate (through find)
+ // the dicom_vr dictionary with oddities like "\004\0" which crashes
+ // both GCC and VC++ implentations of the STL map. Hence when the
+ // expected VR read happens to be non-ascii characters we consider
+ // we hit falsely explicit VR tag.
+
+ if ( (!isalpha(VR[0])) && (!isalpha(VR[1])) )
+ RealExplicit = false;
+
+ // CLEANME searching the dicom_vr at each occurence is expensive.
+ // PostPone this test in an optional integrity check at the end
+ // of parsing or only in debug mode.
+ if ( RealExplicit && !dicom_vr->count(vr) )
+ RealExplicit = false;
+
+ if ( RealExplicit ) {
+ if ( ElVal->IsVrUnknown() )
+ ElVal->SetVR(vr);
return;
}
// We thought this was explicit VR, but we end up with an
// implicit VR tag. Let's backtrack.
- ElVal->SetVR("Implicit");
+ dbg.Verbose(1, "gdcmHeader::FindVR:", "Falsely explicit vr file");
fseek(fp, PositionOnEntry, SEEK_SET);
+ // When this element is known in the dictionary we shall use, e.g. for
+ // the semantics (see the usage of IsAnInteger), the vr proposed by the
+ // dictionary entry. Still we have to flag the element as implicit since
+ // we know now our assumption on expliciteness is not furfilled.
+ // avoid .
+ if ( ElVal->IsVrUnknown() )
+ ElVal->SetVR("Implicit");
+ ElVal->SetImplicitVr();
+}
+
+/**
+ * \ingroup gdcmHeader
+ * \brief Determines if the Transfer Syntax was allready encountered
+ * and if it corresponds to a Big Endian one.
+ *
+ * @return True when big endian found. False in all other cases.
+ */
+bool gdcmHeader::IsBigEndianTransferSyntax(void) {
+ ElValue* Element = PubElVals.GetElement(0x0002, 0x0010);
+ if ( !Element )
+ return false;
+ LoadElementValueSafe(Element);
+ string Transfer = Element->GetValue();
+ if ( Transfer == "1.2.840.10008.1.2.2" )
+ return true;
+ return false;
+}
+
+void gdcmHeader::FixFoundLength(ElValue * ElVal, guint32 FoudLength) {
+ // Heuristic: a final fix.
+ if ( FoudLength == 0xffffffff)
+ FoudLength = 0;
+ ElVal->SetLength(FoudLength);
}
void gdcmHeader::FindLength( ElValue * ElVal) {
- guint32 length32;
+ guint16 element = ElVal->GetElement();
+ string vr = ElVal->GetVR();
guint16 length16;
- string vr = ElVal->GetVR();
-
- if ( (filetype == ExplicitVR) && (vr != "Implicit") ) {
- if ( ( vr == "OB" ) || ( vr == "OW" )
- || ( vr == "SQ" ) || ( vr == "UN" ) ) {
+ if ( (filetype == ExplicitVR) && ! ElVal->IsImplicitVr() ) {
+ if ( (vr=="OB") || (vr=="OW") || (vr=="SQ") || (vr=="UN") ) {
// The following two bytes are reserved, so we skip them,
// and we proceed on reading the length on 4 bytes.
fseek(fp, 2L,SEEK_CUR);
- length32 = ReadInt32();
-
- } else {
- // Length is encoded on 2 bytes.
- length16 = ReadInt16();
-
- if ( length16 == 0xffff) {
- length32 = 0;
- } else {
- length32 = length16;
+ FixFoundLength(ElVal, ReadInt32());
+ return;
+ }
+
+ // Length is encoded on 2 bytes.
+ length16 = ReadInt16();
+
+ // We can tell the current file is encoded in big endian (like
+ // Data/US-RGB-8-epicard) when we find the "Transfer Syntax" tag
+ // and it's value is the one of the encoding of a bie endian file.
+ // In order to deal with such big endian encoded files, we have
+ // (at least) two strategies:
+ // * when we load the "Transfer Syntax" tag with value of big endian
+ // encoding, we raise the proper flags. Then we wait for the end
+ // of the META group (0x0002) among which is "Transfer Syntax",
+ // before switching the swap code to big endian. We have to postpone
+ // the switching of the swap code since the META group is fully encoded
+ // in little endian, and big endian coding only starts at the next
+ // group. The corresponding code can be hard to analyse and adds
+ // many additional unnecessary tests for regular tags.
+ // * the second strategy consist to wait for trouble, that shall appear
+ // when we find the first group with big endian encoding. This is
+ // easy to detect since the length of a "Group Length" tag (the
+ // ones with zero as element number) has to be of 4 (0x0004). When we
+ // encouter 1024 (0x0400) chances are the encoding changed and we
+ // found a group with big endian encoding.
+ // We shall use this second strategy. In order make sure that we
+ // can interpret the presence of an apparently big endian encoded
+ // length of a "Group Length" without committing a big mistake, we
+ // add an additional check: we look in the allready parsed elements
+ // for the presence of a "Transfer Syntax" whose value has to be "big
+ // endian encoding". When this is the case, chances are we got our
+ // hands on a big endian encoded file: we switch the swap code to
+ // big endian and proceed...
+ if ( (element == 0) && (length16 == 1024) ) {
+ if ( ! IsBigEndianTransferSyntax() )
+ throw Error::FileReadError(fp, "gdcmHeader::FindLength");
+ length16 = 4;
+ SwitchSwapToBigEndian();
+ // Restore the unproperly loaded values i.e. the group, the element
+ // and the dictionary entry depending on them.
+ guint16 CorrectGroup = SwapShort(ElVal->GetGroup());
+ guint16 CorrectElem = SwapShort(ElVal->GetElement());
+ gdcmDictEntry * NewTag = IsInDicts(CorrectGroup, CorrectElem);
+ if (!NewTag) {
+ // This correct tag is not in the dictionary. Create a new one.
+ NewTag = new gdcmDictEntry(CorrectGroup, CorrectElem);
}
+ // FIXME this can create a memory leaks on the old entry that be
+ // left unreferenced.
+ ElVal->SetDictEntry(NewTag);
}
- } else {
- // Either implicit VR or an explicit VR that (at least for this
- // element) lied a little bit. Length is on 4 bytes.
- length32 = ReadInt32();
+
+ // Heuristic: well some files are really ill-formed.
+ if ( length16 == 0xffff) {
+ length16 = 0;
+ dbg.Verbose(0, "gdcmHeader::FindLength",
+ "Erroneous element length fixed.");
+ }
+ FixFoundLength(ElVal, (guint32)length16);
+ return;
}
-
- // Traitement des curiosites sur la longueur
- if ( length32 == 0xffffffff)
- length32=0;
-
- ElVal->SetLength(length32);
+
+ // Either implicit VR or an explicit VR that (at least for this
+ // element) lied a little bit. Length is on 4 bytes.
+ FixFoundLength(ElVal, ReadInt32());
}
/**
* \ingroup gdcmHeader
- * \brief remet les octets dans un ordre compatible avec celui du processeur
-
- * @return longueur retenue pour le champ
+ * \brief Swaps back the bytes of 4-byte long integer accordingly to
+ * processor order.
+ *
+ * @return The suggested integer.
*/
guint32 gdcmHeader::SwapLong(guint32 a) {
// FIXME: il pourrait y avoir un pb pour les entiers negatifs ...
/**
* \ingroup gdcmHeader
- * \brief remet les octets dans un ordre compatible avec celui du processeur
-
- * @return longueur retenue pour le champ
+ * \brief Swaps the bytes so they agree with the processor order
+ * @return The properly swaped 16 bits integer.
*/
guint16 gdcmHeader::SwapShort(guint16 a) {
if ( (sw==4321) || (sw==2143) )
(void)fseek(fp, (long)ElVal->GetLength(), SEEK_CUR);
}
+/**
+ * \ingroup gdcmHeader
+ * \brief Loads the element if it's size is not to big.
+ * @param ElVal Element whose value shall be loaded.
+ * @param MaxSize Size treshold above which the element value is not
+ * loaded in memory. The element value is allways loaded
+ * when MaxSize is equal to UINT32_MAX.
+ * @return
+ */
void gdcmHeader::LoadElementValue(ElValue * ElVal) {
size_t item_read;
guint16 group = ElVal->GetGroup();
guint16 elem = ElVal->GetElement();
string vr = ElVal->GetVR();
guint32 length = ElVal->GetLength();
+ bool SkipLoad = false;
+
fseek(fp, (long)ElVal->GetOffset(), SEEK_SET);
// Sequences not treated yet !
- if( vr == "SQ" ) {
+ if( vr == "SQ" )
+ SkipLoad = true;
+
+ // Heuristic : a sequence "contains" a set of tags (called items). It looks
+ // like the last tag of a sequence (the one that terminates the sequence)
+ // has a group of 0xfffe (with a dummy length).
+ if( group == 0xfffe )
+ SkipLoad = true;
+
+ // The group length doesn't represent data to be loaded in memory, since
+ // each element of the group shall be loaded individualy.
+ if( elem == 0 )
+ SkipLoad = true;
+
+ if ( SkipLoad ) {
SkipElementValue(ElVal);
ElVal->SetLength(0);
+ ElVal->SetValue("gdcm::Skipped");
return;
}
- // A sequence "contains" a set of tags (called items). It looks like
- // the last tag of a sequence (the one that terminates the sequence)
- // has a group of 0xfffe (with a dummy length).
- if( group == 0xfffe) {
- SkipElementValue(ElVal);
- ElVal->SetLength(0);
+
+ // When the length is zero things are easy:
+ if ( length == 0 ) {
+ ElVal->SetValue("");
return;
}
- if ( IsAnInteger(group, elem, vr, length) ) {
+ // When an integer is expected, read and convert the following two or
+ // four bytes properly i.e. as an integer as opposed to a string.
+ if ( IsAnInteger(ElVal) ) {
guint32 NewInt;
if( length == 2 ) {
NewInt = ReadInt16();
ElVal->SetValue(NewValue);
}
+/**
+ * \ingroup gdcmHeader
+ * \brief Loads the element while preserving the current
+ * underlying file position indicator as opposed to
+ * to LoadElementValue that modifies it.
+ * @param ElVal Element whose value shall be loaded.
+ * @return
+ */
+void gdcmHeader::LoadElementValueSafe(ElValue * ElVal) {
+ long PositionOnEntry = ftell(fp);
+ LoadElementValue(ElVal);
+ fseek(fp, PositionOnEntry, SEEK_SET);
+}
+
guint16 gdcmHeader::ReadInt16(void) {
guint16 g;
// Find out if the tag we encountered is in the dictionaries:
gdcmDictEntry * NewTag = IsInDicts(g, n);
if (!NewTag)
- NewTag = new gdcmDictEntry(g, n, "Unknown", "Unknown", "Unkown");
+ NewTag = new gdcmDictEntry(g, n);
NewElVal = new ElValue(NewTag);
if (!NewElVal) {
}
FindVR(NewElVal);
- FindLength(NewElVal);
+ try { FindLength(NewElVal); }
+ catch ( Error::FileReadError ) { // Call it quits
+ return (ElValue *)0;
+ }
NewElVal->SetOffset(ftell(fp));
return NewElVal;
}
-bool gdcmHeader::IsAnInteger(guint16 group, guint16 element,
- string vr, guint32 length ) {
- // When we have some semantics on the element we just read, and we
- // a priori now we are dealing with an integer, then we can swap it's
- // element value properly.
+bool gdcmHeader::IsAnInteger(ElValue * ElVal) {
+ guint16 group = ElVal->GetGroup();
+ guint16 element = ElVal->GetElement();
+ string vr = ElVal->GetVR();
+ guint32 length = ElVal->GetLength();
+
+ // When we have some semantics on the element we just read, and if we
+ // a priori know we are dealing with an integer, then we shall be
+ // able to swap it's element value properly.
if ( element == 0 ) { // This is the group length of the group
- if (length != 4)
- dbg.Error("gdcmHeader::ShouldBeSwaped", "should be four");
- return true;
+ if (length == 4)
+ return true;
+ else
+ dbg.Error("gdcmHeader::IsAnInteger",
+ "Erroneous Group Length element length.");
}
if ( group % 2 != 0 )