+ // Length is encoded on 2 bytes.
+ length16 = ReadInt16();
+
+ // We can tell the current file is encoded in big endian (like
+ // Data/US-RGB-8-epicard) when we find the "Transfer Syntax" tag
+ // and it's value is the one of the encoding of a big endian file.
+ // In order to deal with such big endian encoded files, we have
+ // (at least) two strategies:
+ // * when we load the "Transfer Syntax" tag with value of big endian
+ // encoding, we raise the proper flags. Then we wait for the end
+ // of the META group (0x0002) among which is "Transfer Syntax",
+ // before switching the swap code to big endian. We have to postpone
+ // the switching of the swap code since the META group is fully encoded
+ // in little endian, and big endian coding only starts at the next
+ // group. The corresponding code can be hard to analyse and adds
+ // many additional unnecessary tests for regular tags.
+ // * the second strategy consists in waiting for trouble, that shall
+ // appear when we find the first group with big endian encoding. This
+ // is easy to detect since the length of a "Group Length" tag (the
+ // ones with zero as element number) has to be of 4 (0x0004). When we
+ // encounter 1024 (0x0400) chances are the encoding changed and we
+ // found a group with big endian encoding.
+ // We shall use this second strategy. In order to make sure that we
+ // can interpret the presence of an apparently big endian encoded
+ // length of a "Group Length" without committing a big mistake, we
+ // add an additional check: we look in the already parsed elements
+ // for the presence of a "Transfer Syntax" whose value has to be "big
+ // endian encoding". When this is the case, chances are we have got our
+ // hands on a big endian encoded file: we switch the swap code to
+ // big endian and proceed...
+ if ( (element == 0x0000) && (length16 == 0x0400) ) {
+ if ( ! IsExplicitVRBigEndianTransferSyntax() ) {
+ dbg.Verbose(0, "gdcmHeader::FindLength", "not explicit VR");
+ errno = 1;
+ return;
+ }
+ length16 = 4;
+ SwitchSwapToBigEndian();
+ // Restore the unproperly loaded values i.e. the group, the element
+ // and the dictionary entry depending on them.
+ guint16 CorrectGroup = SwapShort(ElVal->GetGroup());
+ guint16 CorrectElem = SwapShort(ElVal->GetElement());
+ gdcmDictEntry * NewTag = GetDictEntryByNumber(CorrectGroup,
+ CorrectElem);
+ if (!NewTag) {
+ // This correct tag is not in the dictionary. Create a new one.
+ NewTag = new gdcmDictEntry(CorrectGroup, CorrectElem);
+ }
+ // FIXME this can create a memory leaks on the old entry that be
+ // left unreferenced.
+ ElVal->SetDictEntry(NewTag);
+ }
+
+ // Heuristic: well some files are really ill-formed.
+ if ( length16 == 0xffff) {
+ length16 = 0;
+ //dbg.Verbose(0, "gdcmHeader::FindLength",
+ // "Erroneous element length fixed.");
+ // Actually, length= 0xffff means that we deal with
+ // Unknown Sequence Length
+ }
+
+ FixHeaderEntryFoundLength(ElVal, (guint32)length16);
+ return;
+ }
+
+ // Either implicit VR or a non DICOM conformal (see not below) explicit
+ // VR that ommited the VR of (at least) this element. Farts happen.
+ // [Note: according to the part 5, PS 3.5-2001, section 7.1 p25
+ // on Data elements "Implicit and Explicit VR Data Elements shall
+ // not coexist in a Data Set and Data Sets nested within it".]
+ // Length is on 4 bytes.
+ FixHeaderEntryFoundLength(ElVal, ReadInt32());
+ return;
+}
+
+/**
+ * \ingroup gdcmHeader
+ * \brief Find the Value Representation of the current Dicom Element.
+ * @param ElVal
+ */
+void gdcmHeader::FindHeaderEntryVR( gdcmHeaderEntry *ElVal) {
+ if (filetype != ExplicitVR)
+ return;
+
+ char VR[3];
+ std::string vr;
+ int lgrLue;
+ char msg[100]; // for sprintf. Sorry
+
+ long PositionOnEntry = ftell(fp);
+ // Warning: we believe this is explicit VR (Value Representation) because
+ // we used a heuristic that found "UL" in the first tag. Alas this
+ // doesn't guarantee that all the tags will be in explicit VR. In some
+ // cases (see e-film filtered files) one finds implicit VR tags mixed
+ // within an explicit VR file. Hence we make sure the present tag
+ // is in explicit VR and try to fix things if it happens not to be
+ // the case.
+ bool RealExplicit = true;
+
+ lgrLue=fread (&VR, (size_t)2,(size_t)1, fp);
+ VR[2]=0;
+ vr = std::string(VR);
+
+ // Assume we are reading a falsely explicit VR file i.e. we reached
+ // a tag where we expect reading a VR but are in fact we read the
+ // first to bytes of the length. Then we will interogate (through find)
+ // the dicom_vr dictionary with oddities like "\004\0" which crashes
+ // both GCC and VC++ implementations of the STL map. Hence when the
+ // expected VR read happens to be non-ascii characters we consider
+ // we hit falsely explicit VR tag.
+
+ if ( (!isalpha(VR[0])) && (!isalpha(VR[1])) )
+ RealExplicit = false;
+
+ // CLEANME searching the dicom_vr at each occurence is expensive.
+ // PostPone this test in an optional integrity check at the end
+ // of parsing or only in debug mode.
+ if ( RealExplicit && !dicom_vr->Count(vr) )
+ RealExplicit= false;
+
+ if ( RealExplicit ) {
+ if ( ElVal->IsVRUnknown() ) {
+ // When not a dictionary entry, we can safely overwrite the VR.
+ ElVal->SetVR(vr);
+ return;
+ }
+ if ( ElVal->GetVR() == vr ) {
+ // The VR we just read and the dictionary agree. Nothing to do.
+ return;
+ }
+ // The VR present in the file and the dictionary disagree. We assume
+ // the file writer knew best and use the VR of the file. Since it would
+ // be unwise to overwrite the VR of a dictionary (since it would
+ // compromise it's next user), we need to clone the actual DictEntry
+ // and change the VR for the read one.
+ gdcmDictEntry* NewTag = new gdcmDictEntry(ElVal->GetGroup(),
+ ElVal->GetElement(),
+ vr,
+ "FIXME",
+ ElVal->GetName());
+ ElVal->SetDictEntry(NewTag);
+ return;
+ }
+
+ // We thought this was explicit VR, but we end up with an
+ // implicit VR tag. Let's backtrack.
+
+ sprintf(msg,"Falsely explicit vr file (%04x,%04x)\n",
+ ElVal->GetGroup(),ElVal->GetElement());
+ dbg.Verbose(1, "gdcmHeader::FindVR: ",msg);
+
+ fseek(fp, PositionOnEntry, SEEK_SET);
+ // When this element is known in the dictionary we shall use, e.g. for
+ // the semantics (see the usage of IsAnInteger), the VR proposed by the
+ // dictionary entry. Still we have to flag the element as implicit since
+ // we know now our assumption on expliciteness is not furfilled.
+ // avoid .
+ if ( ElVal->IsVRUnknown() )
+ ElVal->SetVR("Implicit");
+ ElVal->SetImplicitVr();
+}
+
+/**
+ * \ingroup gdcmHeader
+ * \brief
+ * @param ElVal
+ * @return
+ */
+void gdcmHeader::SkipHeaderEntry(gdcmHeaderEntry * entry) {
+ SkipBytes(entry->GetLength());
+}
+
+/**
+ * \ingroup gdcmHeader
+ * \brief When the length of an element value is obviously wrong (because
+ * the parser went Jabberwocky) one can hope improving things by
+ * applying this heuristic.
+ */
+void gdcmHeader::FixHeaderEntryFoundLength(gdcmHeaderEntry * ElVal, guint32 FoundLength) {
+
+ ElVal->SetReadLength(FoundLength); // will be updated only if a bug is found
+
+ if ( FoundLength == 0xffffffff) {
+ FoundLength = 0;
+ }
+
+ // Sorry for the patch!
+ // XMedCom did the trick to read some nasty GE images ...
+ else if (FoundLength == 13) {
+ // The following 'if' will be removed when there is no more
+ // images on Creatis HDs with a 13 length for Manufacturer...
+ if ( (ElVal->GetGroup() != 0x0008) ||
+ ( (ElVal->GetElement() != 0x0070) && (ElVal->GetElement() != 0x0080) ) ) {
+ // end of remove area
+ FoundLength =10;
+ ElVal->SetReadLength(10); // a bug is to be fixed
+ }
+ }
+ // to fix some garbage 'Leonardo' Siemens images
+ // May be commented out to avoid overhead
+ else if ( (ElVal->GetGroup() == 0x0009) &&
+ ( (ElVal->GetElement() == 0x1113) || (ElVal->GetElement() == 0x1114) ) ){
+ FoundLength =4;
+ ElVal->SetReadLength(4); // a bug is to be fixed
+ }
+ // end of fix
+
+ // to try to 'go inside' SeQuences (with length), and not to skip them
+ else if ( ElVal->GetVR() == "SQ") {
+ if (enableSequences) // only if the user does want to !
+ FoundLength =0;
+ }
+
+ // a SeQuence Element is beginning
+ // Let's forget it's length
+ // (we want to 'go inside')
+
+ // Pb : *normaly* fffe|e000 is just a marker, its length *should be* zero
+ // in gdcm-MR-PHILIPS-16-Multi-Seq.dcm we find lengthes as big as 28800
+ // if we set the length to zero IsHeaderEntryAnInteger() breaks...
+ // if we don't, we lost 28800 characters from the Header :-(
+
+ else if(ElVal->GetGroup() == 0xfffe){
+ // sometimes, length seems to be wrong
+ FoundLength =0; // some more clever checking to be done !
+ // I give up!
+ // only gdcm-MR-PHILIPS-16-Multi-Seq.dcm
+ // causes troubles :-(
+ }
+
+ ElVal->SetUsableLength(FoundLength);
+}
+
+/**
+ * \ingroup gdcmHeader
+ * \brief Apply some heuristics to predict wether the considered
+ * element value contains/represents an integer or not.
+ * @param ElVal The element value on which to apply the predicate.
+ * @return The result of the heuristical predicate.
+ */
+bool gdcmHeader::IsHeaderEntryAnInteger(gdcmHeaderEntry * ElVal) {
+ guint16 element = ElVal->GetElement();
+ guint16 group = ElVal->GetGroup();
+ std::string vr = ElVal->GetVR();
+ guint32 length = ElVal->GetLength();
+
+ // When we have some semantics on the element we just read, and if we
+ // a priori know we are dealing with an integer, then we shall be
+ // able to swap it's element value properly.
+ if ( element == 0 ) { // This is the group length of the group
+ if (length == 4)
+ return true;
+ else {
+ std::ostringstream s;
+ s << "Erroneous Group Length element length on :" \
+ << std::hex << group << " , " << element;
+ dbg.Error("gdcmHeader::IsAnInteger",
+ s.str().c_str());
+ }
+ }
+ if ( (vr == "UL") || (vr == "US") || (vr == "SL") || (vr == "SS") )
+ return true;
+
+ return false;
+}
+
+/**
+ * \ingroup gdcmHeader
+ * \brief
+ *
+ * @return
+ */
+ guint32 gdcmHeader::FindHeaderEntryLengthOB(void) {
+ // See PS 3.5-2001, section A.4 p. 49 on encapsulation of encoded pixel data.
+ guint16 g;
+ guint16 n;
+ long PositionOnEntry = ftell(fp);
+ bool FoundSequenceDelimiter = false;
+ guint32 TotalLength = 0;
+ guint32 ItemLength;
+
+ while ( ! FoundSequenceDelimiter) {
+ g = ReadInt16();
+ n = ReadInt16();
+ if (errno == 1)
+ return 0;
+ TotalLength += 4; // We even have to decount the group and element
+
+ if ( g != 0xfffe && g!=0xb00c ) /*for bogus header */ {
+ char msg[100]; // for sprintf. Sorry
+ sprintf(msg,"wrong group (%04x) for an item sequence (%04x,%04x)\n",g, g,n);
+ dbg.Verbose(1, "gdcmHeader::FindLengthOB: ",msg);
+ errno = 1;
+ return 0;
+ }
+ if ( n == 0xe0dd || ( g==0xb00c && n==0x0eb6 ) ) /* for bogus header */
+ FoundSequenceDelimiter = true;
+ else if ( n != 0xe000 ){
+ char msg[100]; // for sprintf. Sorry
+ sprintf(msg,"wrong element (%04x) for an item sequence (%04x,%04x)\n",
+ n, g,n);
+ dbg.Verbose(1, "gdcmHeader::FindLengthOB: ",msg);
+ errno = 1;
+ return 0;
+ }
+ ItemLength = ReadInt32();
+ TotalLength += ItemLength + 4; // We add 4 bytes since we just read
+ // the ItemLength with ReadInt32
+ SkipBytes(ItemLength);
+ }
+ fseek(fp, PositionOnEntry, SEEK_SET);
+ return TotalLength;
+}
+
+/**
+ * \ingroup gdcmHeader
+ * \brief Reads a supposed to be 16 Bits integer
+ * \ (swaps it depending on processor endianity)
+ *
+ * @return integer acts as a boolean
+ */
+guint16 gdcmHeader::ReadInt16(void) {
+ guint16 g;
+ size_t item_read;
+ item_read = fread (&g, (size_t)2,(size_t)1, fp);
+ if ( item_read != 1 ) {
+ // dbg.Verbose(0, "gdcmHeader::ReadInt16", " Failed to read :");
+ // if(feof(fp))
+ // dbg.Verbose(0, "gdcmHeader::ReadInt16", " End of File encountered");
+ if(ferror(fp))
+ dbg.Verbose(0, "gdcmHeader::ReadInt16", " File Error");
+ errno = 1;
+ return 0;
+ }
+ errno = 0;
+ g = SwapShort(g);
+ return g;
+}
+
+/**
+ * \ingroup gdcmHeader
+ * \brief Reads a supposed to be 32 Bits integer
+ * \ (swaps it depending on processor endianity)
+ *
+ * @return
+ */
+guint32 gdcmHeader::ReadInt32(void) {
+ guint32 g;
+ size_t item_read;
+ item_read = fread (&g, (size_t)4,(size_t)1, fp);
+ if ( item_read != 1 ) {
+ //dbg.Verbose(0, "gdcmHeader::ReadInt32", " Failed to read :");
+ //if(feof(fp))
+ // dbg.Verbose(0, "gdcmHeader::ReadInt32", " End of File encountered");
+ if(ferror(fp))
+ dbg.Verbose(0, "gdcmHeader::ReadInt32", " File Error");
+ errno = 1;
+ return 0;
+ }
+ errno = 0;
+ g = SwapLong(g);
+ return g;
+}
+
+/**
+ * \ingroup gdcmHeader
+ * \brief
+ *
+ * @return
+ */
+void gdcmHeader::SkipBytes(guint32 NBytes) {
+ //FIXME don't dump the returned value
+ (void)fseek(fp, (long)NBytes, SEEK_CUR);
+}
+
+/**
+ * \ingroup gdcmHeader
+ * \brief
+ */
+void gdcmHeader::Initialise(void) {
+ dicom_vr = gdcmGlobal::GetVR();
+ dicom_ts = gdcmGlobal::GetTS();
+ Dicts = gdcmGlobal::GetDicts();
+ RefPubDict = Dicts->GetDefaultPubDict();
+ RefShaDict = (gdcmDict*)0;
+}
+
+/**
+ * \ingroup gdcmHeader
+ * \brief Discover what the swap code is (among little endian, big endian,
+ * bad little endian, bad big endian).
+ *
+ */
+void gdcmHeader::CheckSwap()
+{
+ // Fourth semantics:
+ //
+ // ---> Warning : This fourth field is NOT part
+ // of the 'official' Dicom Dictionnary
+ // and should NOT be used.
+ // (Not defined for all the groups
+ // may be removed in a future release)
+ //
+ // CMD Command
+ // META Meta Information
+ // DIR Directory
+ // ID
+ // PAT Patient
+ // ACQ Acquisition
+ // REL Related
+ // IMG Image
+ // SDY Study
+ // VIS Visit
+ // WAV Waveform
+ // PRC
+ // DEV Device
+ // NMI Nuclear Medicine
+ // MED
+ // BFS Basic Film Session
+ // BFB Basic Film Box
+ // BIB Basic Image Box
+ // BAB
+ // IOB
+ // PJ
+ // PRINTER
+ // RT Radio Therapy
+ // DVH
+ // SSET
+ // RES Results
+ // CRV Curve
+ // OLY Overlays
+ // PXL Pixels
+ // DL Delimiters
+ //
+
+ // The only guaranted way of finding the swap code is to find a
+ // group tag since we know it's length has to be of four bytes i.e.
+ // 0x00000004. Finding the swap code in then straigthforward. Trouble
+ // occurs when we can't find such group...
+ guint32 s;
+ guint32 x=4; // x : for ntohs
+ bool net2host; // true when HostByteOrder is the same as NetworkByteOrder
+
+ int lgrLue;
+ char * entCur;
+ char deb[HEADER_LENGTH_TO_READ];
+
+ // First, compare HostByteOrder and NetworkByteOrder in order to
+ // determine if we shall need to swap bytes (i.e. the Endian type).
+ if (x==ntohs(x))
+ net2host = true;
+ else
+ net2host = false;
+ //cout << net2host << endl;
+
+ // The easiest case is the one of a DICOM header, since it possesses a
+ // file preamble where it suffice to look for the string "DICM".
+ lgrLue = fread(deb, 1, HEADER_LENGTH_TO_READ, fp);
+
+ entCur = deb + 128;
+ if(memcmp(entCur, "DICM", (size_t)4) == 0) {
+ dbg.Verbose(1, "gdcmHeader::CheckSwap:", "looks like DICOM Version3");
+ // Next, determine the value representation (VR). Let's skip to the
+ // first element (0002, 0000) and check there if we find "UL"
+ // - or "OB" if the 1st one is (0002,0001) -,
+ // in which case we (almost) know it is explicit VR.
+ // WARNING: if it happens to be implicit VR then what we will read
+ // is the length of the group. If this ascii representation of this
+ // length happens to be "UL" then we shall believe it is explicit VR.
+ // FIXME: in order to fix the above warning, we could read the next
+ // element value (or a couple of elements values) in order to make
+ // sure we are not commiting a big mistake.
+ // We need to skip :
+ // * the 128 bytes of File Preamble (often padded with zeroes),
+ // * the 4 bytes of "DICM" string,
+ // * the 4 bytes of the first tag (0002, 0000),or (0002, 0001)
+ // i.e. a total of 136 bytes.
+ entCur = deb + 136;
+ // FIXME
+ // Use gdcmHeader::dicom_vr to test all the possibilities
+ // instead of just checking for UL, OB and UI !?
+ if( (memcmp(entCur, "UL", (size_t)2) == 0) ||
+ (memcmp(entCur, "OB", (size_t)2) == 0) ||
+ (memcmp(entCur, "UI", (size_t)2) == 0) )
+ {
+ filetype = ExplicitVR;
+ dbg.Verbose(1, "gdcmHeader::CheckSwap:",
+ "explicit Value Representation");
+ } else {
+ filetype = ImplicitVR;
+ dbg.Verbose(1, "gdcmHeader::CheckSwap:",
+ "not an explicit Value Representation");
+ }
+ if (net2host) {
+ sw = 4321;
+ dbg.Verbose(1, "gdcmHeader::CheckSwap:",
+ "HostByteOrder != NetworkByteOrder");
+ } else {
+ sw = 0;
+ dbg.Verbose(1, "gdcmHeader::CheckSwap:",
+ "HostByteOrder = NetworkByteOrder");
+ }
+
+ // Position the file position indicator at first tag (i.e.
+ // after the file preamble and the "DICM" string).
+ rewind(fp);
+ fseek (fp, 132L, SEEK_SET);
+ return;
+ } // End of DicomV3
+
+ // Alas, this is not a DicomV3 file and whatever happens there is no file
+ // preamble. We can reset the file position indicator to where the data
+ // is (i.e. the beginning of the file).
+ dbg.Verbose(1, "gdcmHeader::CheckSwap:", "not a DICOM Version3 file");
+ rewind(fp);
+
+ // Our next best chance would be to be considering a 'clean' ACR/NEMA file.
+ // By clean we mean that the length of the first tag is written down.
+ // If this is the case and since the length of the first group HAS to be
+ // four (bytes), then determining the proper swap code is straightforward.
+
+ entCur = deb + 4;
+ // We assume the array of char we are considering contains the binary
+ // representation of a 32 bits integer. Hence the following dirty
+ // trick :
+ s = *((guint32 *)(entCur));
+
+ switch (s) {
+ case 0x00040000 :
+ sw = 3412;
+ filetype = ACR;
+ return;
+ case 0x04000000 :
+ sw = 4321;
+ filetype = ACR;
+ return;
+ case 0x00000400 :
+ sw = 2143;
+ filetype = ACR;
+ return;
+ case 0x00000004 :
+ sw = 0;
+ filetype = ACR;
+ return;
+ default :
+ dbg.Verbose(0, "gdcmHeader::CheckSwap:",
+ "ACR/NEMA unfound swap info (time to raise bets)");
+ }
+
+ // We are out of luck. It is not a DicomV3 nor a 'clean' ACR/NEMA file.
+ // It is time for despaired wild guesses. So, let's assume this file
+ // happens to be 'dirty' ACR/NEMA, i.e. the length of the group is
+ // not present. Then the only info we have is the net2host one.
+ filetype = Unknown;
+ if (! net2host )
+ sw = 0;
+ else
+ sw = 4321;
+ return;
+}
+
+/**
+ * \ingroup gdcmHeader
+ * \brief
+ */
+void gdcmHeader::SwitchSwapToBigEndian(void) {
+ dbg.Verbose(1, "gdcmHeader::SwitchSwapToBigEndian",
+ "Switching to BigEndian mode.");
+ if ( sw == 0 ) {
+ sw = 4321;
+ return;
+ }
+ if ( sw == 4321 ) {
+ sw = 0;
+ return;
+ }
+ if ( sw == 3412 ) {
+ sw = 2143;
+ return;
+ }
+ if ( sw == 2143 )
+ sw = 3412;
+}
+
+/**
+ * \ingroup gdcmHeader
+ * \brief
+ * @param NewSize
+ * @return
+ */
+void gdcmHeader::SetMaxSizeLoadElementValue(long NewSize) {
+ if (NewSize < 0)
+ return;
+ if ((guint32)NewSize >= (guint32)0xffffffff) {
+ MaxSizeLoadElementValue = 0xffffffff;
+ return;
+ }
+ MaxSizeLoadElementValue = NewSize;
+}
+
+/**
+ * \ingroup gdcmHeader
+ * \brief Searches both the public and the shadow dictionary (when they
+ * exist) for the presence of the DictEntry with given
+ * group and element. The public dictionary has precedence on the
+ * shadow one.
+ * @param group group of the searched DictEntry
+ * @param element element of the searched DictEntry
+ * @return Corresponding DictEntry when it exists, NULL otherwise.
+ */
+gdcmDictEntry * gdcmHeader::GetDictEntryByNumber(guint16 group,
+ guint16 element) {
+ gdcmDictEntry * found = (gdcmDictEntry*)0;
+ if (!RefPubDict && !RefShaDict) {
+ dbg.Verbose(0, "gdcmHeader::GetDictEntry",
+ "we SHOULD have a default dictionary");
+ }
+ if (RefPubDict) {
+ found = RefPubDict->GetTagByNumber(group, element);
+ if (found)
+ return found;