X-Git-Url: https://git.creatis.insa-lyon.fr/pubgit/?a=blobdiff_plain;f=src%2FgdcmHeader.cxx;h=3107da2ff3bf31765a6245aedcf5dcc9181e8854;hb=e06d61ba347d7f5f692a682434e089d939f85653;hp=79aa0139dc2654de387de3a8094183e96dd9b559;hpb=88af35455d7d4b056766880c5d3a3b5a7340bd8b;p=gdcm.git diff --git a/src/gdcmHeader.cxx b/src/gdcmHeader.cxx index 79aa0139..3107da2f 100644 --- a/src/gdcmHeader.cxx +++ b/src/gdcmHeader.cxx @@ -1,340 +1,964 @@ -#include "gdcmlib.h" +// gdcmHeader.cxx + +#include "gdcm.h" +#include +// For nthos: +#ifdef _MSC_VER +#include +#else +#include +#endif +#include // for isalpha +#include +#include +#include "gdcmUtil.h" + +#define HEADER_LENGTH_TO_READ 256 // on ne lit plus que le debut + +namespace Error { + struct FileReadError { + FileReadError(FILE* fp, const char* Mesg) { + if (feof(fp)) + dbg.Verbose(1, "EOF encountered :", Mesg); + if (ferror(fp)) + dbg.Verbose(1, "Error on reading :", Mesg); + } + }; +} -/* ======================================================================= +//FIXME: this looks dirty to me... +#define str2num(str, typeNum) *((typeNum *)(str)) - _IdDcmCheckSwap - La seule maniere sure que l'on aie pour determiner - si on est en LITTLE_ENDIAN, BIG-ENDIAN, - BAD-LITTLE-ENDIAN, BAD-BIG-ENDIAN - est de trouver l'element qui donne la longueur d'un 'GROUP' - (on sait que la longueur de cet element vaut 0x00000004) - et de regarder comment cette longueur est codee en memoire +VRHT * gdcmHeader::dicom_vr = (VRHT*)0; +gdcmDictSet* gdcmHeader::Dicts = new gdcmDictSet(); - Le probleme vient de ce que parfois, il n'y en a pas ... +void gdcmHeader::Initialise(void) { + if (!gdcmHeader::dicom_vr) + InitVRDict(); + RefPubDict = gdcmHeader::Dicts->GetDefaultPublicDict(); + RefShaDict = (gdcmDict*)0; +} - On fait alors le pari qu'on a a faire a du LITTLE_ENDIAN propre. - (Ce qui est la norme -pas respectee- depuis ACR-NEMA) - Si ce n'est pas le cas, on ne peut rien faire. +gdcmHeader::gdcmHeader (const char* InFilename) { + SetMaxSizeLoadElementValue(1024); + filename = InFilename; + Initialise(); + fp=fopen(InFilename,"rw"); + dbg.Error(!fp, "gdcmHeader::gdcmHeader cannot open file", InFilename); + ParseHeader(); +} - (il faudrait avoir des fonctions auxquelles - on passe le code Swap en parametre, pour faire des essais 'manuels') +gdcmHeader::~gdcmHeader (void) { + fclose(fp); + return; +} - ======================================================================= */ +void gdcmHeader::InitVRDict (void) { + if (dicom_vr) { + dbg.Verbose(0, "gdcmHeader::InitVRDict:", "VR dictionary allready set"); + return; + } + VRHT *vr = new VRHT; + (*vr)["AE"] = "Application Entity"; // At most 16 bytes + (*vr)["AS"] = "Age String"; // Exactly 4 bytes + (*vr)["AT"] = "Attribute Tag"; // 2 16-bit unsigned short integers + (*vr)["CS"] = "Code String"; // At most 16 bytes + (*vr)["DA"] = "Date"; // Exactly 8 bytes + (*vr)["DS"] = "Decimal String"; // At most 16 bytes + (*vr)["DT"] = "Date Time"; // At most 26 bytes + (*vr)["FL"] = "Floating Point Single"; // 32-bit IEEE 754:1985 float + (*vr)["FD"] = "Floating Point Double"; // 64-bit IEEE 754:1985 double + (*vr)["IS"] = "Integer String"; // At most 12 bytes + (*vr)["LO"] = "Long String"; // At most 64 chars + (*vr)["LT"] = "Long Text"; // At most 10240 chars + (*vr)["OB"] = "Other Byte String"; // String of bytes (vr independant) + (*vr)["OW"] = "Other Word String"; // String of 16-bit words (vr dep) + (*vr)["PN"] = "Person Name"; // At most 64 chars + (*vr)["SH"] = "Short String"; // At most 16 chars + (*vr)["SL"] = "Signed Long"; // Exactly 4 bytes + (*vr)["SQ"] = "Sequence of Items"; // Not Applicable + (*vr)["SS"] = "Signed Short"; // Exactly 2 bytes + (*vr)["ST"] = "Short Text"; // At most 1024 chars + (*vr)["TM"] = "Time"; // At most 16 bytes + (*vr)["UI"] = "Unique Identifier"; // At most 64 bytes + (*vr)["UL"] = "Unsigned Long "; // Exactly 4 bytes + (*vr)["UN"] = "Unknown"; // Any length of bytes + (*vr)["US"] = "Unsigned Short "; // Exactly 2 bytes + (*vr)["UT"] = "Unlimited Text"; // At most 2^32 -1 chars + dicom_vr = vr; +} -EndianType gdcmHeader::gdcmHeader() +/** + * \ingroup gdcmHeader + * \brief Discover what the swap code is (among little endian, big endian, + * bad little endian, bad big endian). + * + */ +void gdcmHeader::CheckSwap() { - //guint32 s; + // The only guaranted way of finding the swap code is to find a + // group tag since we know it's length has to be of four bytes i.e. + // 0x00000004. Finding the swap code in then straigthforward. Trouble + // occurs when we can't find such group... + guint32 s; guint32 x=4; // x : pour ntohs bool net2host; // true when HostByteOrder is the same as NetworkByteOrder - - int sw; + int lgrLue; char * entCur; - char deb[LGR_ENTETE_A_LIRE]; - - // On teste le processeur - if (x==ntohs(x)) { - net2host = true; - } else { - net2host = false; - } - - // On commence par verifier si c'est du DICOM 'actuel' - // ------------- - - lgrLue = fread(deb,1,LGR_ENTETE_A_LIRE,e->fp); - - entCur = deb+128; - if(memcmp(entCur, "DICM", (size_t)4) == 0) { - filetype = TrueDicom; - if (DEBUG) printf ("_IdDcmCheckSwap : C est du DICOM actuel \n"); - } else { - filetype = Unknown; - if (DEBUG) printf ("_IdDcmCheckSwap : Ce n'est PAS du DICOM actuel\n"); - } - - if(filetype == TrueDicom) { - // on saute le File Preamble (souvent a ZERO) : 128 Octets - // + le DICM (4), et le (0002, 0000) soit 4 (136 = 128 + 4 + 4) - entCur = deb+136; - if(memcmp(entCur, "UL", (size_t)2) == 0) { - // les 2 premiers octets de la lgr peuvent valoir UL --> Explicit VR - filetype = ExplicitVR; - if (DEBUG) printf ("_IdDcmCheckSwap : Explicit VR\n"); - } else { - filetype = ImplicitVR; - if (DEBUG) printf ("_IdDcmCheckSwap : PAS Explicit VR\n"); - } - - if (net2host) { // HostByteOrder is different from NetworkByteOrder - sw = 0; // on est sur PC ou DEC --> LITTLE-ENDIAN -> Rien a faire - if (DEBUG) printf("HostByteOrder = NetworkByteOrder\n"); - - } else { /* on est sur une Sun ou une SGI */ - sw = 4321; - if (DEBUG) printf("HostByteOrder != NetworkByteOrder\n"); - } - - rewind(e->fp); - fseek (e->fp, 132L, SEEK_SET); //On se positionne sur le debut des info - e->offsetCourant=132; - return sw; - - } /* fin TrueDicom */ - - // Pas du TrueDicom : permiere hypothese c'est de l'ACR 'propre', auquel - // cas la lgr du premier element du groupe est FORCEMENT 4 - - entCur=deb + 4; - s=str2num(entCur,int); - - switch (s) { - case 0x00040000 : - sw=3412; if(DEBUG) printf("s : %08x sw : %d\n",s,sw); - filetype = ACR; - break; - case 0x04000000 : - sw=4321; if(DEBUG) printf("s : %08x sw : %d\n",s,sw); - filetype = ACR; - break; - case 0x00000400 : - sw=2143; if(DEBUG) printf("s : %08x sw : %d\n",s,sw); - filetype = ACR; - break; - case 0x00000004 : - sw=0; if(DEBUG) printf("s : %08x sw : %d\n",s,sw); - filetype = ACR; - break; - default : - sw = -1; - if (DEBUG) printf (" Pas trouve l info de Swap; On va parier\n"); + char deb[HEADER_LENGTH_TO_READ]; + + // First, compare HostByteOrder and NetworkByteOrder in order to + // determine if we shall need to swap bytes (i.e. the Endian type). + if (x==ntohs(x)) + net2host = true; + else + net2host = false; + + // The easiest case is the one of a DICOM header, since it possesses a + // file preamble where it suffice to look for the sting "DICM". + lgrLue = fread(deb, 1, HEADER_LENGTH_TO_READ, fp); + + entCur = deb + 128; + if(memcmp(entCur, "DICM", (size_t)4) == 0) { + filetype = TrueDicom; + dbg.Verbose(1, "gdcmHeader::CheckSwap:", "looks like DICOM Version3"); + } else { + filetype = Unknown; + dbg.Verbose(1, "gdcmHeader::CheckSwap:", "not a DICOM Version3 file"); } - // Deuxieme hypothese : c'est de l'ACR 'pas propre' i.e. il manque - // la lgr du groupe + if(filetype == TrueDicom) { + // Next, determine the value representation (VR). Let's skip to the + // first element (0002, 0000) and check there if we find "UL", in + // which case we (almost) know it is explicit VR. + // WARNING: if it happens to be implicit VR then what we will read + // is the length of the group. If this ascii representation of this + // length happens to be "UL" then we shall believe it is explicit VR. + // FIXME: in order to fix the above warning, we could read the next + // element value (or a couple of elements values) in order to make + // sure we are not commiting a big mistake. + // We need to skip : + // * the 128 bytes of File Preamble (often padded with zeroes), + // * the 4 bytes of "DICM" string, + // * the 4 bytes of the first tag (0002, 0000), + // i.e. a total of 136 bytes. + entCur = deb + 136; + if(memcmp(entCur, "UL", (size_t)2) == 0) { + filetype = ExplicitVR; + dbg.Verbose(1, "gdcmHeader::CheckSwap:", + "explicit Value Representation"); + } else { + filetype = ImplicitVR; + dbg.Verbose(1, "gdcmHeader::CheckSwap:", + "not an explicit Value Representation"); + } - if(sw==-1) { - /* On n'a pas trouve l'info de swap 28/11/2000 JPR */ - // Si c'est du VRAI ACR NEMA si on est sur une DEC ou un PC swap=0, - // SUN ou SGI SWAP=4321 - /* si c'est du RAW, ca degagera + tard */ + if (net2host) { + sw = 4321; + dbg.Verbose(1, "gdcmHeader::CheckSwap:", + "HostByteOrder != NetworkByteOrder"); + } else { + sw = 0; + dbg.Verbose(1, "gdcmHeader::CheckSwap:", + "HostByteOrder = NetworkByteOrder"); + } + + // Position the file position indicator at first tag (i.e. + // after the file preamble and the "DICM" string). + rewind(fp); + fseek (fp, 132L, SEEK_SET); + return; + } // End of TrueDicom + + // Alas, this is not a DicomV3 file and whatever happens there is no file + // preamble. We can reset the file position indicator to where the data + // is (i.e. the beginning of the file). + rewind(fp); + + // Our next best chance would be to be considering a 'clean' ACR/NEMA file. + // By clean we mean that the length of the first tag is written down. + // If this is the case and since the length of the first group HAS to be + // four (bytes), then determining the proper swap code is straightforward. + + entCur = deb + 4; + s = str2num(entCur, guint32); + + switch (s) { + case 0x00040000 : + sw = 3412; + filetype = ACR; + return; + case 0x04000000 : + sw = 4321; + filetype = ACR; + return; + case 0x00000400 : + sw = 2143; + filetype = ACR; + return; + case 0x00000004 : + sw = 0; + filetype = ACR; + return; + default : + dbg.Verbose(0, "gdcmHeader::CheckSwap:", + "ACR/NEMA unfound swap info (time to raise bets)"); + } - if (DEBUG) printf("On force la chance \n"); + // We are out of luck. It is not a DicomV3 nor a 'clean' ACR/NEMA file. + // It is time for despaired wild guesses. So, let's assume this file + // happens to be 'dirty' ACR/NEMA, i.e. the length of the group is + // not present. Then the only info we have is the net2host one. + if (! net2host ) + sw = 0; + else + sw = 4321; + return; +} - if (x!=ntohs(x)) // HostByteOrder is different from NetworkByteOrder - sw = 0; // on est sur PC ou DEC --> LITTLE-ENDIAN -> Rien a faire - else - sw = 4321; // on est sur Sun ou SGI +void gdcmHeader::SwitchSwapToBigEndian(void) { + dbg.Verbose(1, "gdcmHeader::SwitchSwapToBigEndian", + "Switching to BigEndian mode."); + if ( sw == 0 ) { + sw = 4321; + return; } + if ( sw == 4321 ) { + sw = 0; + return; + } + if ( sw == 3412 ) { + sw = 2143; + return; + } + if ( sw == 2143 ) + sw = 3412; +} + +void gdcmHeader::GetPixels(size_t lgrTotale, void* Pixels) { + size_t pixelsOffset; + pixelsOffset = GetPixelOffset(); + fseek(fp, pixelsOffset, SEEK_SET); + fread(Pixels, 1, lgrTotale, fp); +} - rewind(e->fp); // les info commencent au debut - e->offsetCourant=0; - return (sw); -} - -void gdcmHeader::_setAcrLibido() { - - _ID_DCM_ELEM * ple; - PLIST_ELEMENT plelem; - PLIST pl; - - // Positionnement ACR_LIBIDO - if(DEBUG) printf("Entree ds _setAcrLibido\n"); - - filetype = ACR_LIBIDO = 0; - if ( filetype != TrueDicom) { - // Recognition Code --> n'existe plus en DICOM V3 ... - - pl = e->plist; - plelem = IdLstFirst(pl); - while (plelem) { - ple= IdLstPtrObj(plelem); - if(DEBUG) printf("gr %04x Num %04x\n", ple->Gr, ple->Num); - if(ple->Gr > 0x0008) break; // On a depasse - if(ple->Gr == 0x0008) { - if(ple->Num > 0x0010) break; // On a depasse - if(ple->Num == 0x0010) { - if ( (memcmp(ple->valeurElem,"ACRNEMA_LIBIDO",14)==0) - // si c'est egal - || (memcmp(ple->valeurElem,"CANRME_AILIBOD",14)==0)) { - // en cas d'objet ACRLibido fait sr 1 autre machine) - e->ACR_LIBIDO =1; - } // fin if memcmp - break; - } // fin if ple->Num==0x0010 - } // fin ple->Gr==0x0008 - plelem = IdLstNext(plelem); - } // fin while - } // fin if TrueDicom - - return; -} - -/* ======================================================================= -* _IdDcmRecupLgr -* -* ACR-NEMA : On a toujours -* GroupNumber (2 Octets) -* ElementNumber (2 Octets) -* ElementSize (4 Octets) -* -* -* DICOM : On peut avoir (implicit Value Representation) -* GroupNumber (2 Octets) -* ElementNumber (2 Octets) -* ElementSize (4 Octets) -* -* On peut avoir (explicit Value Representation) -* GroupNumber (2 Octets) -* ElementNumber (2 Octets) -* ValueRepresentation (2 Octets) -* ElementSize (2 Octets) -* -* ATTENTION : dans le cas ou ValueRepresentation = OB, OW, SQ, UN -* GroupNumber (2 Octets) -* ElementNumber (2 Octets) -* ValueRepresentation (2 Octets) -* zone reservee (2 Octets) -* ElementSize (4 Octets) -* -* -* ======================================================================= */ /** - * \ingroup dcm - * \brief recupere la longueur d'un champ DICOM. - * (le fichier doit deja avoir ete ouvert, - * _IdAcrCheckSwap(ID_DCM_HDR *e) avoir ete appele) - * et la partie 'group' ainsi que la partie 'elem' - * de l'acr_element doivent avoir ete lues. - * @param sw code swap - * @param skippedLength pointeur sur nombre d'octets que l'on a saute qd la lecture est finie - * @param longueurLue pointeur sur longueur (en nombre d'octets) effectivement lue - - * @return longueur retenue pour le champ + * \ingroup gdcmHeader + * \brief Find the value representation of the current tag. + * + * @param sw code swap + * @param skippedLength pointeur sur nombre d'octets que l'on a saute qd + * la lecture est finie + * @param longueurLue pointeur sur longueur (en nombre d'octets) + * effectivement lue + * @return longueur retenue pour le champ */ -static guint32 _IdDcmRecupLgr(ID_DCM_HDR *e, int sw, int *skippedLength, int *longueurLue) { -guint32 l_gr; -unsigned short int l_gr_2; -int i, trouve; -char VR[5]; -int lgrLue; +void gdcmHeader::FindVR( ElValue *ElVal) { + if (filetype != ExplicitVR) + return; -/* - * ATTENTION : -*/ + char VR[3]; + string vr; + int lgrLue; + long PositionOnEntry = ftell(fp); + // Warning: we believe this is explicit VR (Value Representation) because + // we used a heuristic that found "UL" in the first tag. Alas this + // doesn't guarantee that all the tags will be in explicit VR. In some + // cases (see e-film filtered files) one finds implicit VR tags mixed + // within an explicit VR file. Hence we make sure the present tag + // is in explicit VR and try to fix things if it happens not to be + // the case. + bool RealExplicit = true; + + lgrLue=fread (&VR, (size_t)2,(size_t)1, fp); + VR[2]=0; + vr = string(VR); + + // Assume we are reading a falsely explicit VR file i.e. we reached + // a tag where we expect reading a VR but are in fact we read the + // first to bytes of the length. Then we will interogate (through find) + // the dicom_vr dictionary with oddities like "\004\0" which crashes + // both GCC and VC++ implementations of the STL map. Hence when the + // expected VR read happens to be non-ascii characters we consider + // we hit falsely explicit VR tag. + + if ( (!isalpha(VR[0])) && (!isalpha(VR[1])) ) + RealExplicit = false; + + // CLEANME searching the dicom_vr at each occurence is expensive. + // PostPone this test in an optional integrity check at the end + // of parsing or only in debug mode. + if ( RealExplicit && !dicom_vr->count(vr) ) + RealExplicit = false; + + if ( RealExplicit ) { + if ( ElVal->IsVrUnknown() ) { + // When not a dictionary entry, we can safely overwrite the vr. + ElVal->SetVR(vr); + return; + } + if ( ElVal->GetVR() == vr ) { + // The vr we just read and the dictionary agree. Nothing to do. + return; + } + // The vr present in the file and the dictionary disagree. We assume + // the file writer knew best and use the vr of the file. Since it would + // be unwise to overwrite the vr of a dictionary (since it would + // compromise it's next user), we need to clone the actual DictEntry + // and change the vr for the read one. + gdcmDictEntry* NewTag = new gdcmDictEntry(ElVal->GetGroup(), + ElVal->GetElement(), + vr, + "FIXME", + ElVal->GetName()); + ElVal->SetDictEntry(NewTag); + return; + } + + // We thought this was explicit VR, but we end up with an + // implicit VR tag. Let's backtrack. + dbg.Verbose(1, "gdcmHeader::FindVR:", "Falsely explicit vr file"); + fseek(fp, PositionOnEntry, SEEK_SET); + // When this element is known in the dictionary we shall use, e.g. for + // the semantics (see the usage of IsAnInteger), the vr proposed by the + // dictionary entry. Still we have to flag the element as implicit since + // we know now our assumption on expliciteness is not furfilled. + // avoid . + if ( ElVal->IsVrUnknown() ) + ElVal->SetVR("Implicit"); + ElVal->SetImplicitVr(); +} -int nbCode=26; // nombre d'elements dans la table de type DICOM_VR definie dans dicom.c +/** + * \ingroup gdcmHeader + * \brief Determines if the Transfer Syntax was allready encountered + * and if it corresponds to a Big Endian one. + * + * @return True when big endian found. False in all other cases. + */ +bool gdcmHeader::IsBigEndianTransferSyntax(void) { + ElValue* Element = PubElVals.GetElementByNumber(0x0002, 0x0010); + if ( !Element ) + return false; + LoadElementValueSafe(Element); + string Transfer = Element->GetValue(); + if ( Transfer == "1.2.840.10008.1.2.2" ) + return true; + return false; +} -/* ================ */ +void gdcmHeader::FixFoundLength(ElValue * ElVal, guint32 FoudLength) { + // Heuristic: a final fix. + if ( FoudLength == 0xffffffff) + FoudLength = 0; + ElVal->SetLength(FoudLength); +} -// ID_DCM_HDR *e sert uniquement de passe-plat pour __ExplicitVR +guint32 gdcmHeader::FindLengthOB(void) { + // See PS 3.5-2001, section A.4 p. 49 on encapsulation of encoded pixel data. + guint16 g; + guint16 n; + long PositionOnEntry = ftell(fp); + bool FoundSequenceDelimiter = false; + guint32 TotalLength = 0; + guint32 ItemLength; + + while ( ! FoundSequenceDelimiter) { + g = ReadInt16(); + n = ReadInt16(); + TotalLength += 4; // We even have to decount the group and element + if ( g != 0xfffe ) { + dbg.Verbose(1, "gdcmHeader::FindLengthOB: ", + "wrong group for an item sequence."); + throw Error::FileReadError(fp, "gdcmHeader::FindLengthOB"); + } + if ( n == 0xe0dd ) + FoundSequenceDelimiter = true; + else if ( n != 0xe000) { + dbg.Verbose(1, "gdcmHeader::FindLengthOB: ", + "wrong element for an item sequence."); + throw Error::FileReadError(fp, "gdcmHeader::FindLengthOB"); + } + ItemLength = ReadInt32(); + TotalLength += ItemLength + 4; // We add 4 bytes since we just read + // the ItemLength with ReadInt32 + SkipBytes(ItemLength); + } + fseek(fp, PositionOnEntry, SEEK_SET); + return TotalLength; +} +void gdcmHeader::FindLength(ElValue * ElVal) { + guint16 element = ElVal->GetElement(); + string vr = ElVal->GetVR(); + guint16 length16; + + if ( (filetype == ExplicitVR) && ! ElVal->IsImplicitVr() ) { + + if ( (vr=="OB") || (vr=="OW") || (vr=="SQ") || (vr=="UN") ) { + // The following reserved two bytes (see PS 3.5-2001, section + // 7.1.2 Data element structure with explicit vr p27) must be + // skipped before proceeding on reading the length on 4 bytes. + fseek(fp, 2L, SEEK_CUR); + guint32 length32 = ReadInt32(); + if ( (vr == "OB") && (length32 == 0xffffffff) ) { + ElVal->SetLength(FindLengthOB()); + return; + } + FixFoundLength(ElVal, length32); + return; + } -if (e->__ExplicitVR == 1) { - lgrLue=fread (&VR, (size_t)2,(size_t)1, e->fp); - VR[2]=0; + // Length is encoded on 2 bytes. + length16 = ReadInt16(); + + // We can tell the current file is encoded in big endian (like + // Data/US-RGB-8-epicard) when we find the "Transfer Syntax" tag + // and it's value is the one of the encoding of a big endian file. + // In order to deal with such big endian encoded files, we have + // (at least) two strategies: + // * when we load the "Transfer Syntax" tag with value of big endian + // encoding, we raise the proper flags. Then we wait for the end + // of the META group (0x0002) among which is "Transfer Syntax", + // before switching the swap code to big endian. We have to postpone + // the switching of the swap code since the META group is fully encoded + // in little endian, and big endian coding only starts at the next + // group. The corresponding code can be hard to analyse and adds + // many additional unnecessary tests for regular tags. + // * the second strategy consist in waiting for trouble, that shall appear + // when we find the first group with big endian encoding. This is + // easy to detect since the length of a "Group Length" tag (the + // ones with zero as element number) has to be of 4 (0x0004). When we + // encouter 1024 (0x0400) chances are the encoding changed and we + // found a group with big endian encoding. + // We shall use this second strategy. In order make sure that we + // can interpret the presence of an apparently big endian encoded + // length of a "Group Length" without committing a big mistake, we + // add an additional check: we look in the allready parsed elements + // for the presence of a "Transfer Syntax" whose value has to be "big + // endian encoding". When this is the case, chances are we got our + // hands on a big endian encoded file: we switch the swap code to + // big endian and proceed... + if ( (element == 0x000) && (length16 == 0x0400) ) { + if ( ! IsBigEndianTransferSyntax() ) + throw Error::FileReadError(fp, "gdcmHeader::FindLength"); + length16 = 4; + SwitchSwapToBigEndian(); + // Restore the unproperly loaded values i.e. the group, the element + // and the dictionary entry depending on them. + guint16 CorrectGroup = SwapShort(ElVal->GetGroup()); + guint16 CorrectElem = SwapShort(ElVal->GetElement()); + gdcmDictEntry * NewTag = IsInDicts(CorrectGroup, CorrectElem); + if (!NewTag) { + // This correct tag is not in the dictionary. Create a new one. + NewTag = new gdcmDictEntry(CorrectGroup, CorrectElem); + } + // FIXME this can create a memory leaks on the old entry that be + // left unreferenced. + ElVal->SetDictEntry(NewTag); + } + + // Heuristic: well some files are really ill-formed. + if ( length16 == 0xffff) { + length16 = 0; + dbg.Verbose(0, "gdcmHeader::FindLength", + "Erroneous element length fixed."); + } + FixFoundLength(ElVal, (guint32)length16); + return; + } - // ATTENTION : - // Ce n'est pas parce qu'on a trouve UL la premiere fois qu'on respecte - // Explicit VR tout le temps - // (cf e=film ...) + // Either implicit VR or a non DICOM conformal (see not below) explicit + // VR that ommited the VR of (at least) this element. Farts happen. + // [Note: according to the part 5, PS 3.5-2001, section 7.1 p25 + // on Data elements "Implicit and Explicit VR Data Elements shall + // not coexist in a Data Set and Data Sets nested within it".] + // Length is on 4 bytes. + FixFoundLength(ElVal, ReadInt32()); +} - for(i=0,trouve=0;ipleCourant)->VR=_ID_dicom_vr[i].dicom_VR; - trouve=1; - break; - } +/** + * \ingroup gdcmHeader + * \brief Swaps back the bytes of 4-byte long integer accordingly to + * processor order. + * + * @return The suggested integer. + */ +guint32 gdcmHeader::SwapLong(guint32 a) { + // FIXME: il pourrait y avoir un pb pour les entiers negatifs ... + switch (sw) { + case 0 : + break; + case 4321 : + a=( ((a<<24) & 0xff000000) | ((a<<8) & 0x00ff0000) | + ((a>>8) & 0x0000ff00) | ((a>>24) & 0x000000ff) ); + break; + + case 3412 : + a=( ((a<<16) & 0xffff0000) | ((a>>16) & 0x0000ffff) ); + break; + + case 2143 : + a=( ((a<<8) & 0xff00ff00) | ((a>>8) & 0x00ff00ff) ); + break; + default : + dbg.Error(" gdcmHeader::SwapLong : unset swap code"); + a=0; } + return(a); +} - if ( trouve == 0) { +/** + * \ingroup gdcmHeader + * \brief Swaps the bytes so they agree with the processor order + * @return The properly swaped 16 bits integer. + */ +guint16 gdcmHeader::SwapShort(guint16 a) { + if ( (sw==4321) || (sw==2143) ) + a =(((a<<8) & 0x0ff00) | ((a>>8)&0x00ff)); + return (a); +} - // On est mal : implicit VR repere - // mais ce n'est pas un code connu ... - // On reconstitue la longueur - - if(DEBUG) printf("IdDcmRecupLgr : Explicit VR, mais pas trouve de code connu\n"); - memcpy(&l_gr, VR,(size_t)2); +void gdcmHeader::SkipBytes(guint32 NBytes) { + //FIXME don't dump the returned value + (void)fseek(fp, (long)NBytes, SEEK_CUR); +} - lgrLue=fread ( ((char*)&l_gr)+2, (size_t)2, (size_t)1, e->fp); +void gdcmHeader::SkipElementValue(ElValue * ElVal) { + SkipBytes(ElVal->GetLength()); +} - if(sw) l_gr = _IdDcmSWAP_LONG(((guint32)l_gr),sw); - - if(DEBUG) printf("IdDcmRecupLgr : lgr deduite : %08x , %d\n",l_gr,l_gr); +void gdcmHeader::SetMaxSizeLoadElementValue(long NewSize) { + if (NewSize < 0) + return; + if ((guint32)NewSize >= (guint32)0xffffffff) { + MaxSizeLoadElementValue = 0xffffffff; + return; + } + MaxSizeLoadElementValue = NewSize; +} + +/** + * \ingroup gdcmHeader + * \brief Loads the element if it's size is not to big. + * @param ElVal Element whose value shall be loaded. + * @param MaxSize Size treshold above which the element value is not + * loaded in memory. The element value is allways loaded + * when MaxSize is equal to UINT32_MAX. + * @return + */ +void gdcmHeader::LoadElementValue(ElValue * ElVal) { + size_t item_read; + guint16 group = ElVal->GetGroup(); + guint16 elem = ElVal->GetElement(); + string vr = ElVal->GetVR(); + guint32 length = ElVal->GetLength(); + bool SkipLoad = false; + + fseek(fp, (long)ElVal->GetOffset(), SEEK_SET); + + // Sequences not treated yet ! + // + // Ne faudrait-il pas au contraire trouver immediatement + // une maniere 'propre' de traiter les sequences (vr = SQ) + // car commencer par les ignorer risque de conduire a qq chose + // qui pourrait ne pas etre generalisable + // + if( vr == "SQ" ) + SkipLoad = true; + + // Heuristic : a sequence "contains" a set of tags (called items). It looks + // like the last tag of a sequence (the one that terminates the sequence) + // has a group of 0xfffe (with a dummy length). + if( group == 0xfffe ) + SkipLoad = true; + + // The group length doesn't represent data to be loaded in memory, since + // each element of the group shall be loaded individualy. + if( elem == 0 ) + SkipLoad = true; + + if ( SkipLoad ) { + // FIXME the following skip is not necessary + SkipElementValue(ElVal); + ElVal->SetLength(0); + ElVal->SetValue("gdcm::Skipped"); + return; + } + + // When the length is zero things are easy: + if ( length == 0 ) { + ElVal->SetValue(""); + return; + } + + // Values bigger than specified are not loaded. + // + // En fait, c'est les elements dont la longueur est superieure + // a celle fixee qui ne sont pas charges + // + if (length > MaxSizeLoadElementValue) { + ostringstream s; + s << "gdcm::NotLoaded."; + s << " Address:" << (long)ElVal->GetOffset(); + s << " Length:" << ElVal->GetLength(); + //mesg += " Length:" + ElVal->GetLength(); + ElVal->SetValue(s.str()); + return; + } + + // When an integer is expected, read and convert the following two or + // four bytes properly i.e. as an integer as opposed to a string. + if ( IsAnInteger(ElVal) ) { + guint32 NewInt; + if( length == 2 ) { + NewInt = ReadInt16(); + } else if( length == 4 ) { + NewInt = ReadInt32(); + } else + dbg.Error(true, "LoadElementValue: Inconsistency when reading Int."); - *longueurLue=l_gr; - if ( (int)l_gr == -1) { - l_gr=0; - } - *skippedLength = 4; - if (DEBUG) printf(" 1 : lgr %08x (%d )skippedLength %d\n",l_gr,l_gr, *skippedLength); - return(l_gr); + //FIXME: make the following an util fonction + ostringstream s; + s << NewInt; + ElVal->SetValue(s.str()); + return; + } + + // FIXME The exact size should be length if we move to strings or whatever + char* NewValue = (char*)malloc(length+1); + if( !NewValue) { + dbg.Verbose(1, "LoadElementValue: Failed to allocate NewValue"); + return; + } + NewValue[length]= 0; + + item_read = fread(NewValue, (size_t)length, (size_t)1, fp); + if ( item_read != 1 ) { + free(NewValue); + Error::FileReadError(fp, "gdcmHeader::LoadElementValue"); + ElVal->SetValue("gdcm::UnRead"); + return; } + ElVal->SetValue(NewValue); +} + +/** + * \ingroup gdcmHeader + * \brief Loads the element while preserving the current + * underlying file position indicator as opposed to + * to LoadElementValue that modifies it. + * @param ElVal Element whose value shall be loaded. + * @return + */ +void gdcmHeader::LoadElementValueSafe(ElValue * ElVal) { + long PositionOnEntry = ftell(fp); + LoadElementValue(ElVal); + fseek(fp, PositionOnEntry, SEEK_SET); +} - // On repart dans la sequence 'sensee' - if(DEBUG) printf("VR : [%01x , %01x] (%c%c) en position %d du tableau\n", VR[0],VR[1],VR[0],VR[1],i); - //printf(" %d , %s\n", i,_ID_dicom_vr[i].dicom_VR); +guint16 gdcmHeader::ReadInt16(void) { + guint16 g; + size_t item_read; + item_read = fread (&g, (size_t)2,(size_t)1, fp); + if ( item_read != 1 ) + throw Error::FileReadError(fp, "gdcmHeader::ReadInt16"); + g = SwapShort(g); + return g; +} + +guint32 gdcmHeader::ReadInt32(void) { + guint32 g; + size_t item_read; + item_read = fread (&g, (size_t)4,(size_t)1, fp); + if ( item_read != 1 ) + throw Error::FileReadError(fp, "gdcmHeader::ReadInt32"); + g = SwapLong(g); + return g; +} + +/** + * \ingroup gdcmHeader + * \brief Read the next tag without loading it's value + * @return On succes the newly created ElValue, NULL on failure. + */ + +ElValue * gdcmHeader::ReadNextElement(void) { + guint16 g; + guint16 n; + ElValue * NewElVal; - if ( - (!memcmp( VR,"OB",(size_t)2 )) || - (!memcmp( VR,"OW",(size_t)2 )) || - (!memcmp( VR,"SQ",(size_t)2 )) || - (!memcmp( VR,"UN",(size_t)2 )) ) { + try { + g = ReadInt16(); + n = ReadInt16(); + } + catch ( Error::FileReadError ) { + // We reached the EOF (or an error occured) and header parsing + // has to be considered as finished. + return (ElValue *)0; + } - // les 2 octets suivants sont reserves + // Find out if the tag we encountered is in the dictionaries: + gdcmDictEntry * NewTag = IsInDicts(g, n); + if (!NewTag) + NewTag = new gdcmDictEntry(g, n); - if(DEBUG) printf("IdDcmRecupLgr : les 2 octets suivants sont reserves\n"); - //on les saute - fseek(e->fp, 2L,SEEK_CUR); - - //on lit la lgr sur QUATRE octets + NewElVal = new ElValue(NewTag); + if (!NewElVal) { + dbg.Verbose(1, "ReadNextElement: failed to allocate ElValue"); + return (ElValue*)0; + } - lgrLue=fread (&l_gr, (size_t)4,(size_t)1, e->fp); + FindVR(NewElVal); + try { FindLength(NewElVal); } + catch ( Error::FileReadError ) { // Call it quits + return (ElValue *)0; + } + NewElVal->SetOffset(ftell(fp)); + return NewElVal; +} - if(sw) l_gr = _IdDcmSWAP_LONG(((guint32)l_gr),sw); - *skippedLength = 8; +bool gdcmHeader::IsAnInteger(ElValue * ElVal) { + guint16 group = ElVal->GetGroup(); + guint16 element = ElVal->GetElement(); + string vr = ElVal->GetVR(); + guint32 length = ElVal->GetLength(); + + // When we have some semantics on the element we just read, and if we + // a priori know we are dealing with an integer, then we shall be + // able to swap it's element value properly. + if ( element == 0 ) { // This is the group length of the group + if (length == 4) + return true; + else + dbg.Error("gdcmHeader::IsAnInteger", + "Erroneous Group Length element length."); + } + + if ( group % 2 != 0 ) + // We only have some semantics on documented elements, which are + // the even ones. + return false; + + if ( (length != 4) && ( length != 2) ) + // Swapping only make sense on integers which are 2 or 4 bytes long. + return false; + + if ( (vr == "UL") || (vr == "US") || (vr == "SL") || (vr == "SS") ) + return true; + + if ( (group == 0x0028) && (element == 0x0005) ) + // This tag is retained from ACR/NEMA + // CHECKME Why should "Image Dimensions" be a single integer ? + // + // "Image Dimensions", c'est en fait le 'nombre de dimensions' + // de l'objet ACR-NEMA stocké + // 1 : Signal + // 2 : Image + // 3 : Volume + // 4 : Sequence + // + // DICOM V3 ne retient pas cette information + // Par defaut, tout est 'Image', + // C'est a l'utilisateur d'explorer l'ensemble des entetes + // pour savoir à quoi il a a faire + // + // Le Dicom Multiframe peut etre utilise pour stocker, + // dans un seul fichier, une serie temporelle (cardio vasculaire GE, p.ex) + // ou un volume (medecine Nucleaire, p.ex) + // + return true; + + if ( (group == 0x0028) && (element == 0x0200) ) + // This tag is retained from ACR/NEMA + return true; + + return false; +} +/** + * \ingroup gdcmHeader + * \brief Recover the offset (from the beginning of the file) of the pixels. + */ +size_t gdcmHeader::GetPixelOffset(void) { + // If this file complies with the norm we should encounter the + // "Image Location" tag (0x0028, 0x0200). This tag contains the + // the group that contains the pixel data (hence the "Pixel Data" + // is found by indirection through the "Image Location"). + // Inside the group pointed by "Image Location" the searched element + // is conventionally the element 0x0010 (when the norm is respected). + // When the "Image Location" is absent we default to group 0x7fe0. + guint16 grPixel; + guint16 numPixel; + string ImageLocation = GetPubElValByName("Image Location"); + if ( ImageLocation == "UNFOUND" ) { + grPixel = 0x7fe0; } else { - //on lit la lgr sur DEUX octets + grPixel = (guint16) atoi( ImageLocation.c_str() ); + } + if (grPixel != 0x7fe0) + // FIXME is this still necessary ? + // Now, this looks like an old dirty fix for Philips imager + numPixel = 0x1010; + else + numPixel = 0x0010; + ElValue* PixelElement = PubElVals.GetElementByNumber(grPixel, numPixel); + if (PixelElement) + return PixelElement->GetOffset(); + else + return 0; +} + +gdcmDictEntry * gdcmHeader::IsInDicts(guint32 group, guint32 element) { + // + // Y a-t-il une raison de lui passer des guint32 + // alors que group et element sont des guint16? + // + gdcmDictEntry * found = (gdcmDictEntry*)0; + if (!RefPubDict && !RefShaDict) { + //FIXME build a default dictionary ! + printf("FIXME in gdcmHeader::IsInDicts\n"); + } + if (RefPubDict) { + found = RefPubDict->GetTag(group, element); + if (found) + return found; + } + if (RefShaDict) { + found = RefShaDict->GetTag(group, element); + if (found) + return found; + } + return found; +} - lgrLue=fread (&l_gr_2, (size_t)2,(size_t)1, e->fp); +list * gdcmHeader::GetPubTagNames(void) { + list * Result = new list; + TagHT entries = RefPubDict->GetEntries(); - if(sw) l_gr_2 = _IdDcmSWAP_SHORT((unsigned short)l_gr_2,sw); - - *longueurLue=l_gr_2; + for (TagHT::iterator tag = entries.begin(); tag != entries.end(); ++tag){ + Result->push_back( tag->second->GetName() ); + } + return Result; +} - - if ( l_gr_2 == 0xffff) { - l_gr = 0; - } else { - l_gr = l_gr_2; - } - *skippedLength = 4; - } - } else { // Explicit VR = 0 - - //on lit la lgr sur QUATRE octets - - lgrLue=fread (&l_gr, (size_t)4,(size_t)1, e->fp); - - if(sw)l_gr=_IdDcmSWAP_LONG(((long)l_gr),sw); - *skippedLength = 4; - } - - *longueurLue=l_gr; - - // Traitement des curiosites sur la longueur - - if ( (int)l_gr == 0xffffffff) - l_gr=0; +map > * gdcmHeader::GetPubTagNamesByCategory(void) { + map > * Result = new map >; + TagHT entries = RefPubDict->GetEntries(); + + for (TagHT::iterator tag = entries.begin(); tag != entries.end(); ++tag){ + (*Result)[tag->second->GetFourth()].push_back(tag->second->GetName()); + } + return Result; +} + +string gdcmHeader::GetPubElValByNumber(guint16 group, guint16 element) { + return PubElVals.GetElValueByNumber(group, element); +} + +string gdcmHeader::GetPubElValRepByNumber(guint16 group, guint16 element) { + ElValue* elem = PubElVals.GetElementByNumber(group, element); + if ( !elem ) + return "gdcm::Unfound"; + return elem->GetVR(); +} + +string gdcmHeader::GetPubElValByName(string TagName) { + return PubElVals.GetElValueByName(TagName); +} + +string gdcmHeader::GetPubElValRepByName(string TagName) { + ElValue* elem = PubElVals.GetElementByName(TagName); + if ( !elem ) + return "gdcm::Unfound"; + return elem->GetVR(); +} + +string gdcmHeader::GetShaElValByNumber(guint16 group, guint16 element) { + return ShaElVals.GetElValueByNumber(group, element); +} + +string gdcmHeader::GetShaElValRepByNumber(guint16 group, guint16 element) { + ElValue* elem = ShaElVals.GetElementByNumber(group, element); + if ( !elem ) + return "gdcm::Unfound"; + return elem->GetVR(); +} + +string gdcmHeader::GetShaElValByName(string TagName) { + return ShaElVals.GetElValueByName(TagName); +} + +string gdcmHeader::GetShaElValRepByName(string TagName) { + ElValue* elem = ShaElVals.GetElementByName(TagName); + if ( !elem ) + return "gdcm::Unfound"; + return elem->GetVR(); +} + + +string gdcmHeader::GetElValByNumber(guint16 group, guint16 element) { + string pub = GetPubElValByNumber(group, element); + if (pub.length()) + return pub; + return GetShaElValByNumber(group, element); +} + +string gdcmHeader::GetElValRepByNumber(guint16 group, guint16 element) { + string pub = GetPubElValRepByNumber(group, element); + if (pub.length()) + return pub; + return GetShaElValRepByNumber(group, element); +} + +string gdcmHeader::GetElValByName(string TagName) { + string pub = GetPubElValByName(TagName); + if (pub.length()) + return pub; + return GetShaElValByName(TagName); +} + +string gdcmHeader::GetElValRepByName(string TagName) { + string pub = GetPubElValRepByName(TagName); + if (pub.length()) + return pub; + return GetShaElValRepByName(TagName); +} + +/** + * \ingroup gdcmHeader + * \brief Parses the header of the file but does NOT load element values. + */ +void gdcmHeader::ParseHeader(void) { + ElValue * newElValue = (ElValue *)0; - if(!memcmp( VR,"SQ",(size_t)2 )) { // ca annonce une SEQUENCE d'items ?! - l_gr=0; // on lira donc les items de la sequence - if (DEBUG) printf(" SQ trouve : lgr %d \n",l_gr); - } - -if (DEBUG) printf(" 2 : lgr %08x (%d) skippedLength %d\n",l_gr,l_gr, *skippedLength); - return(l_gr); + rewind(fp); + CheckSwap(); + while ( (newElValue = ReadNextElement()) ) { + SkipElementValue(newElValue); + PubElVals.Add(newElValue); + } +} + +/** + * \ingroup gdcmHeader + * \brief Loads the element values of all the elements present in the + * public tag based hash table. + */ +void gdcmHeader::LoadElements(void) { + rewind(fp); + TagElValueHT ht = PubElVals.GetTagHt(); + for (TagElValueHT::iterator tag = ht.begin(); tag != ht.end(); ++tag) + LoadElementValue(tag->second); +} + +void gdcmHeader::PrintPubElVal(ostream & os) { + PubElVals.Print(os); +} + +void gdcmHeader::PrintPubDict(ostream & os) { + RefPubDict->Print(os); }