7 #include <netinet/in.h>
9 #include <cctype> // for isalpha
14 #define HEADER_LENGHT_TO_READ 256 // on ne lit plus que le debut
17 struct FileReadError {
18 FileReadError(FILE* fp, const char* Mesg) {
20 dbg.Verbose(1, "EOF encountered :", Mesg);
22 dbg.Verbose(1, "Error on reading :", Mesg);
27 //FIXME: this looks dirty to me...
28 #define str2num(str, typeNum) *((typeNum *)(str))
30 VRHT * gdcmHeader::dicom_vr = (VRHT*)0;
31 gdcmDictSet* gdcmHeader::Dicts = new gdcmDictSet();
33 void gdcmHeader::Initialise(void) {
34 if (!gdcmHeader::dicom_vr)
36 RefPubDict = gdcmHeader::Dicts->GetDefaultPublicDict();
37 RefShaDict = (gdcmDict*)0;
40 gdcmHeader::gdcmHeader (const char* InFilename) {
41 filename = InFilename;
43 fp=fopen(InFilename,"rw");
44 dbg.Error(!fp, "gdcmHeader::gdcmHeader cannot open file", InFilename);
48 gdcmHeader::~gdcmHeader (void) {
53 void gdcmHeader::InitVRDict (void) {
55 dbg.Verbose(0, "gdcmHeader::InitVRDict:", "VR dictionary allready set");
59 (*vr)["AE"] = "Application Entity"; // 16 car max
60 (*vr)["AS"] = "Age String"; // 4 car fixe
61 (*vr)["AT"] = "Attribute Tag"; // 2 unsigned short int
62 (*vr)["CS"] = "Code String"; // 16 car max
63 (*vr)["DA"] = "Date"; // 8 car fixe
64 (*vr)["DS"] = "Decimal String"; // Decimal codé Binaire 16 max
65 (*vr)["DT"] = "Date Time"; // 26 car max
66 (*vr)["FL"] = "Floating Point Single"; // 4 octets IEEE 754:1985
67 (*vr)["FD"] = "Floating Point Double"; // 8 octets IEEE 754:1985
68 (*vr)["IS"] = "Integer String"; // en format externe 12 max
69 (*vr)["LO"] = "Long String"; // 64 octets max
70 (*vr)["LT"] = "Long Text"; // 10240 max
71 (*vr)["OB"] = "Other Byte String";
72 (*vr)["OW"] = "Other Word String";
73 (*vr)["PN"] = "Person Name";
74 (*vr)["SH"] = "Short String"; // 16 car max
75 (*vr)["SL"] = "Signed Long";
76 (*vr)["SQ"] = "Sequence of Items"; // Not Applicable
77 (*vr)["SS"] = "Signed Short"; // 2 octets
78 (*vr)["ST"] = "Short Text"; // 1024 car max
79 (*vr)["TM"] = "Time"; // 16 car max
80 (*vr)["UI"] = "Unique Identifier"; // 64 car max
81 (*vr)["UN"] = "Unknown";
82 (*vr)["UT"] = "Unlimited Text"; // 2 puissance 32 -1 car max
83 (*vr)["UL"] = "Unsigned Long "; // 4 octets fixe
84 (*vr)["US"] = "Unsigned Short "; // 2 octets fixe
90 * \brief La seule maniere sure que l'on aie pour determiner
91 * si on est en LITTLE_ENDIAN, BIG-ENDIAN,
92 * BAD-LITTLE-ENDIAN, BAD-BIG-ENDIAN
93 * est de trouver l'element qui donne la longueur d'un 'GROUP'
94 * (on sait que la longueur de cet element vaut 0x00000004)
95 * et de regarder comment cette longueur est codee en memoire
97 * Le probleme vient de ce que parfois, il n'y en a pas ...
99 * On fait alors le pari qu'on a a faire a du LITTLE_ENDIAN propre.
100 * (Ce qui est la norme -pas respectee- depuis ACR-NEMA)
101 * Si ce n'est pas le cas, on ne peut rien faire.
103 * (il faudrait avoir des fonctions auxquelles
104 * on passe le code Swap en parametre, pour faire des essais 'manuels')
106 void gdcmHeader::CheckSwap()
109 guint32 x=4; // x : pour ntohs
110 bool net2host; // true when HostByteOrder is the same as NetworkByteOrder
114 char deb[HEADER_LENGHT_TO_READ];
116 // First, compare HostByteOrder and NetworkByteOrder in order to
117 // determine if we shall need to swap bytes (i.e. the Endian type).
123 // The easiest case is the one of a DICOM header, since it possesses a
124 // file preamble where it suffice to look for the sting "DICM".
125 lgrLue = fread(deb, 1, HEADER_LENGHT_TO_READ, fp);
128 if(memcmp(entCur, "DICM", (size_t)4) == 0) {
129 filetype = TrueDicom;
130 dbg.Verbose(1, "gdcmHeader::CheckSwap:", "looks like DICOM Version3");
133 dbg.Verbose(1, "gdcmHeader::CheckSwap:", "not a DICOM Version3 file");
136 if(filetype == TrueDicom) {
137 // Next, determine the value representation (VR). Let's skip to the
138 // first element (0002, 0000) and check there if we find "UL", in
139 // which case we (almost) know it is explicit VR.
140 // WARNING: if it happens to be implicit VR then what we will read
141 // is the length of the group. If this ascii representation of this
142 // length happens to be "UL" then we shall believe it is explicit VR.
143 // FIXME: in order to fix the above warning, we could read the next
144 // element value (or a couple of elements values) in order to make
145 // sure we are not commiting a big mistake.
147 // * the 128 bytes of File Preamble (often padded with zeroes),
148 // * the 4 bytes of "DICM" string,
149 // * the 4 bytes of the first tag (0002, 0000),
150 // i.e. a total of 136 bytes.
152 if(memcmp(entCur, "UL", (size_t)2) == 0) {
153 filetype = ExplicitVR;
154 dbg.Verbose(1, "gdcmHeader::CheckSwap:",
155 "explicit Value Representation");
157 filetype = ImplicitVR;
158 dbg.Verbose(1, "gdcmHeader::CheckSwap:",
159 "not an explicit Value Representation");
164 dbg.Verbose(1, "gdcmHeader::CheckSwap:",
165 "HostByteOrder != NetworkByteOrder");
168 dbg.Verbose(1, "gdcmHeader::CheckSwap:",
169 "HostByteOrder = NetworkByteOrder");
172 // Position the file position indicator at first tag (i.e.
173 // after the file preamble and the "DICM" string).
175 fseek (fp, 132L, SEEK_SET);
177 } // End of TrueDicom
179 // Alas, this is not a DicomV3 file and whatever happens there is no file
180 // preamble. We can reset the file position indicator to where the data
181 // is (i.e. the beginning of the file).
184 // Our next best chance would be to be considering a 'clean' ACR/NEMA file.
185 // By clean we mean that the length of the first tag is written down.
186 // If this is the case and since the length of the first group HAS to be
187 // four (bytes), then determining the proper swap code is straightforward.
190 s = str2num(entCur, guint32);
210 dbg.Verbose(0, "gdcmHeader::CheckSwap:",
211 "ACE/NEMA unfound swap info (time to raise bets)");
214 // We are out of luck. It is not a DicomV3 nor a 'clean' ACR/NEMA file.
215 // It is time for despaired wild guesses. So, let's assume this file
216 // happens to be 'dirty' ACR/NEMA, i.e. the length of the group is
217 // not present. Then the only info we have is the net2host one.
218 //FIXME Si c'est du RAW, ca degagera + tard
227 void gdcmHeader::SwitchSwapToBigEndian(void) {
228 dbg.Verbose(0, "gdcmHeader::FindLength", "Switching to BigEndian mode.");
246 * \ingroup gdcmHeader
247 * \brief recupere la longueur d'un champ DICOM.
249 * 1/ le fichier doit deja avoir ete ouvert,
250 * 2/ CheckSwap() doit avoir ete appele
251 * 3/ la partie 'group' ainsi que la partie 'elem'
252 * de l'acr_element doivent avoir ete lues.
254 * ACR-NEMA : we allways get
255 * GroupNumber (2 Octets)
256 * ElementNumber (2 Octets)
257 * ElementSize (4 Octets)
258 * DICOM en implicit Value Representation :
259 * GroupNumber (2 Octets)
260 * ElementNumber (2 Octets)
261 * ElementSize (4 Octets)
263 * DICOM en explicit Value Representation :
264 * GroupNumber (2 Octets)
265 * ElementNumber (2 Octets)
266 * ValueRepresentation (2 Octets)
267 * ElementSize (2 Octets)
269 * ATTENTION : dans le cas ou ValueRepresentation = OB, OW, SQ, UN
270 * GroupNumber (2 Octets)
271 * ElementNumber (2 Octets)
272 * ValueRepresentation (2 Octets)
273 * zone reservee (2 Octets)
274 * ElementSize (4 Octets)
276 * @param sw code swap
277 * @param skippedLength pointeur sur nombre d'octets que l'on a saute qd
278 * la lecture est finie
279 * @param longueurLue pointeur sur longueur (en nombre d'octets)
281 * @return longueur retenue pour le champ
284 void gdcmHeader::FindVR( ElValue *ElVal) {
288 long PositionOnEntry = ftell(fp);
289 // Warning: we believe this is explicit VR (Value Representation) because
290 // we used a heuristic that found "UL" in the first tag. Alas this
291 // doesn't guarantee that all the tags will be in explicit VR. In some
292 // cases (see e-film filtered files) one finds implicit VR tags mixed
293 // within an explicit VR file. Hence we make sure the present tag
294 // is in explicit VR and try to fix things if it happens not to be
296 bool RealExplicit = true;
298 if (filetype != ExplicitVR)
301 lgrLue=fread (&VR, (size_t)2,(size_t)1, fp);
305 // Assume we are reading a falsely explicit VR file i.e. we reached
306 // a tag where we expect reading a VR but are in fact we read the
307 // first to bytes of the length. Then we will interogate (through find)
308 // the dicom_vr dictionary with oddities like "\004\0" which crashes
309 // both GCC and VC++ implentations of the STL map. Hence when the
310 // expected VR read happens to be non-ascii characters we consider
311 // we hit falsely explicit VR tag.
313 if ( (!isalpha(VR[0])) && (!isalpha(VR[1])) )
314 RealExplicit = false;
316 // CLEANME searching the dicom_vr at each occurence is expensive.
317 // PostPone this test in an optional integrity check at the end
318 // of parsing or only in debug mode.
319 if ( RealExplicit && !dicom_vr->count(vr) )
320 RealExplicit = false;
322 if ( RealExplicit ) {
323 if ( ElVal->IsVrUnknown() )
328 // We thought this was explicit VR, but we end up with an
329 // implicit VR tag. Let's backtrack.
330 dbg.Verbose(1, "gdcmHeader::FindVR:", "Falsely explicit vr file");
331 fseek(fp, PositionOnEntry, SEEK_SET);
332 // When this element is known in the dictionary we shall use, e.g. for
333 // the semantics (see the usage of IsAnInteger), the vr proposed by the
334 // dictionary entry. Still we have to flag the element as implicit since
335 // we know now our assumption on expliciteness is not furfilled.
337 if ( ElVal->IsVrUnknown() )
338 ElVal->SetVR("Implicit");
339 ElVal->SetImplicitVr();
343 * \ingroup gdcmHeader
344 * \brief Determines if the Transfer Syntax was allready encountered
345 * and if it corresponds to a Big Endian one.
347 * @return True when big endian found. False in all other cases.
349 bool gdcmHeader::IsBigEndianTransferSyntax(void) {
350 ElValue* Element = PubElVals.GetElement(0x0002, 0x0010);
353 LoadElementValueSafe(Element);
354 string Transfer = Element->GetValue();
355 if ( Transfer == "1.2.840.10008.1.2.2" )
360 void gdcmHeader::FixFoundLength(ElValue * ElVal, guint32 FoudLength) {
361 // Heuristic: a final fix.
362 if ( FoudLength == 0xffffffff)
364 ElVal->SetLength(FoudLength);
367 void gdcmHeader::FindLength( ElValue * ElVal) {
368 guint16 element = ElVal->GetElement();
369 string vr = ElVal->GetVR();
372 if ( (filetype == ExplicitVR) && ! ElVal->IsImplicitVr() ) {
373 if ( (vr=="OB") || (vr=="OW") || (vr=="SQ") || (vr=="UN") ) {
375 // The following two bytes are reserved, so we skip them,
376 // and we proceed on reading the length on 4 bytes.
377 fseek(fp, 2L,SEEK_CUR);
378 FixFoundLength(ElVal, ReadInt32());
382 // Length is encoded on 2 bytes.
383 length16 = ReadInt16();
385 // We can tell the current file is encoded in big endian (like
386 // Data/US-RGB-8-epicard) when we find the "Transfer Syntax" tag
387 // and it's value is the one of the encoding of a bie endian file.
388 // In order to deal with such big endian encoded files, we have
389 // (at least) two strategies:
390 // * when we load the "Transfer Syntax" tag with value of big endian
391 // encoding, we raise the proper flags. Then we wait for the end
392 // of the META group (0x0002) among which is "Transfer Syntax",
393 // before switching the swap code to big endian. We have to postpone
394 // the switching of the swap code since the META group is fully encoded
395 // in little endian, and big endian coding only starts at the next
396 // group. The corresponding code can be hard to analyse and adds
397 // many additional unnecessary tests for regular tags.
398 // * the second strategy consist to wait for trouble, that shall appear
399 // when we find the first group with big endian encoding. This is
400 // easy to detect since the length of a "Group Length" tag (the
401 // ones with zero as element number) has to be of 4 (0x0004). When we
402 // encouter 1024 (0x0400) chances are the encoding changed and we
403 // found a group with big endian encoding.
404 // We shall use this second strategy. In order make sure that we
405 // can interpret the presence of an apparently big endian encoded
406 // length of a "Group Length" without committing a big mistake, we
407 // add an additional check: we look in the allready parsed elements
408 // for the presence of a "Transfer Syntax" whose value has to be "big
409 // endian encoding". When this is the case, chances are we got our
410 // hands on a big endian encoded file: we switch the swap code to
411 // big endian and proceed...
412 if ( (element == 0) && (length16 == 1024) ) {
413 if ( ! IsBigEndianTransferSyntax() )
414 throw Error::FileReadError(fp, "gdcmHeader::FindLength");
416 SwitchSwapToBigEndian();
417 // Restore the unproperly loaded values i.e. the group, the element
418 // and the dictionary entry depending on them.
419 guint16 CorrectGroup = SwapShort(ElVal->GetGroup());
420 guint16 CorrectElem = SwapShort(ElVal->GetElement());
421 gdcmDictEntry * NewTag = IsInDicts(CorrectGroup, CorrectElem);
423 // This correct tag is not in the dictionary. Create a new one.
424 NewTag = new gdcmDictEntry(CorrectGroup, CorrectElem);
426 // FIXME this can create a memory leaks on the old entry that be
427 // left unreferenced.
428 ElVal->SetDictEntry(NewTag);
431 // Heuristic: well some files are really ill-formed.
432 if ( length16 == 0xffff) {
434 dbg.Verbose(0, "gdcmHeader::FindLength",
435 "Erroneous element length fixed.");
437 FixFoundLength(ElVal, (guint32)length16);
441 // Either implicit VR or an explicit VR that (at least for this
442 // element) lied a little bit. Length is on 4 bytes.
443 FixFoundLength(ElVal, ReadInt32());
448 * \ingroup gdcmHeader
449 * \brief Swaps back the bytes of 4-byte long integer accordingly to
452 * @return The suggested integer.
454 guint32 gdcmHeader::SwapLong(guint32 a) {
455 // FIXME: il pourrait y avoir un pb pour les entiers negatifs ...
460 a=( ((a<<24) & 0xff000000) | ((a<<8) & 0x00ff0000) |
461 ((a>>8) & 0x0000ff00) | ((a>>24) & 0x000000ff) );
465 a=( ((a<<16) & 0xffff0000) | ((a>>16) & 0x0000ffff) );
469 a=( ((a<<8) & 0xff00ff00) | ((a>>8) & 0x00ff00ff) );
472 dbg.Error(" gdcmHeader::SwapLong : unset swap code");
479 * \ingroup gdcmHeader
480 * \brief Swaps the bytes so they agree with the processor order
481 * @return The properly swaped 16 bits integer.
483 guint16 gdcmHeader::SwapShort(guint16 a) {
484 if ( (sw==4321) || (sw==2143) )
485 a =(((a<<8) & 0x0ff00) | ((a>>8)&0x00ff));
489 void gdcmHeader::SkipElementValue(ElValue * ElVal) {
490 //FIXME don't dump the returned value
491 (void)fseek(fp, (long)ElVal->GetLength(), SEEK_CUR);
495 * \ingroup gdcmHeader
496 * \brief Loads the element if it's size is not to big.
497 * @param ElVal Element whose value shall be loaded.
498 * @param MaxSize Size treshold above which the element value is not
499 * loaded in memory. The element value is allways loaded
500 * when MaxSize is equal to UINT32_MAX.
503 void gdcmHeader::LoadElementValue(ElValue * ElVal) {
505 guint16 group = ElVal->GetGroup();
506 guint16 elem = ElVal->GetElement();
507 string vr = ElVal->GetVR();
508 guint32 length = ElVal->GetLength();
509 bool SkipLoad = false;
511 fseek(fp, (long)ElVal->GetOffset(), SEEK_SET);
513 // Sequences not treated yet !
517 // Heuristic : a sequence "contains" a set of tags (called items). It looks
518 // like the last tag of a sequence (the one that terminates the sequence)
519 // has a group of 0xfffe (with a dummy length).
520 if( group == 0xfffe )
523 // The group length doesn't represent data to be loaded in memory, since
524 // each element of the group shall be loaded individualy.
529 SkipElementValue(ElVal);
531 ElVal->SetValue("gdcm::Skipped");
535 // When the length is zero things are easy:
541 // When an integer is expected, read and convert the following two or
542 // four bytes properly i.e. as an integer as opposed to a string.
543 if ( IsAnInteger(ElVal) ) {
546 NewInt = ReadInt16();
547 } else if( length == 4 ) {
548 NewInt = ReadInt32();
550 dbg.Error(true, "LoadElementValue: Inconsistency when reading Int.");
552 //FIXME: make the following an util fonction
555 ElVal->SetValue(s.str());
559 // FIXME The exact size should be length if we move to strings or whatever
560 char* NewValue = (char*)g_malloc(length+1);
562 dbg.Verbose(1, "LoadElementValue: Failed to allocate NewValue");
567 // FIXME les elements trop long (seuil a fixer a la main) ne devraient
568 // pas etre charge's !!!! Voir TODO.
569 item_read = fread(NewValue, (size_t)length, (size_t)1, fp);
570 if ( item_read != 1 ) {
572 Error::FileReadError(fp, "gdcmHeader::LoadElementValue");
573 ElVal->SetValue("gdcm::UnRead");
576 ElVal->SetValue(NewValue);
580 * \ingroup gdcmHeader
581 * \brief Loads the element while preserving the current
582 * underlying file position indicator as opposed to
583 * to LoadElementValue that modifies it.
584 * @param ElVal Element whose value shall be loaded.
587 void gdcmHeader::LoadElementValueSafe(ElValue * ElVal) {
588 long PositionOnEntry = ftell(fp);
589 LoadElementValue(ElVal);
590 fseek(fp, PositionOnEntry, SEEK_SET);
594 guint16 gdcmHeader::ReadInt16(void) {
597 item_read = fread (&g, (size_t)2,(size_t)1, fp);
598 if ( item_read != 1 )
599 throw Error::FileReadError(fp, "gdcmHeader::ReadInt16");
604 guint32 gdcmHeader::ReadInt32(void) {
607 item_read = fread (&g, (size_t)4,(size_t)1, fp);
608 if ( item_read != 1 )
609 throw Error::FileReadError(fp, "gdcmHeader::ReadInt32");
615 * \ingroup gdcmHeader
616 * \brief Read the next tag without loading it's value
617 * @return On succes the newly created ElValue, NULL on failure.
620 ElValue * gdcmHeader::ReadNextElement(void) {
629 catch ( Error::FileReadError ) {
630 // We reached the EOF (or an error occured) and header parsing
631 // has to be considered as finished.
635 // Find out if the tag we encountered is in the dictionaries:
636 gdcmDictEntry * NewTag = IsInDicts(g, n);
638 NewTag = new gdcmDictEntry(g, n);
640 NewElVal = new ElValue(NewTag);
642 dbg.Verbose(1, "ReadNextElement: failed to allocate ElValue");
647 try { FindLength(NewElVal); }
648 catch ( Error::FileReadError ) { // Call it quits
651 NewElVal->SetOffset(ftell(fp));
655 bool gdcmHeader::IsAnInteger(ElValue * ElVal) {
656 guint16 group = ElVal->GetGroup();
657 guint16 element = ElVal->GetElement();
658 string vr = ElVal->GetVR();
659 guint32 length = ElVal->GetLength();
661 // When we have some semantics on the element we just read, and if we
662 // a priori know we are dealing with an integer, then we shall be
663 // able to swap it's element value properly.
664 if ( element == 0 ) { // This is the group length of the group
668 dbg.Error("gdcmHeader::IsAnInteger",
669 "Erroneous Group Length element length.");
672 if ( group % 2 != 0 )
673 // We only have some semantics on documented elements, which are
677 if ( (length != 4) && ( length != 2) )
678 // Swapping only make sense on integers which are 2 or 4 bytes long.
681 if ( (vr == "UL") || (vr == "US") || (vr == "SL") || (vr == "SS") )
684 if ( (group == 0x0028) && (element == 0x0005) )
685 // This tag is retained from ACR/NEMA
686 // CHECKME Why should "Image Dimensions" be a single integer ?
689 if ( (group == 0x0028) && (element == 0x0200) )
690 // This tag is retained from ACR/NEMA
697 * \ingroup gdcmHeader
698 * \brief Recover the offset (from the beginning of the file) of the pixels.
700 size_t gdcmHeader::GetPixelOffset(void) {
701 // If this file complies with the norm we should encounter the
702 // "Image Location" tag (0x0028, 0x0200). This tag contains the
703 // the group that contains the pixel data (hence the "Pixel Data"
704 // is found by indirection through the "Image Location").
705 // Inside the group pointed by "Image Location" the searched element
706 // is conventionally the element 0x0010 (when the norm is respected).
707 // When the "Image Location" is absent we default to group 0x7fe0.
710 string ImageLocation = GetPubElValByName("Image Location");
711 if ( ImageLocation == "UNFOUND" ) {
714 grPixel = (guint16) atoi( ImageLocation.c_str() );
716 if (grPixel != 0x7fe0)
717 // FIXME is this still necessary ?
718 // Now, this looks like an old dirty fix for Philips imager
722 ElValue* PixelElement = PubElVals.GetElement(grPixel, numPixel);
724 return PixelElement->GetOffset();
729 gdcmDictEntry * gdcmHeader::IsInDicts(guint32 group, guint32 element) {
730 gdcmDictEntry * found = (gdcmDictEntry*)0;
731 if (!RefPubDict && !RefShaDict) {
732 //FIXME build a default dictionary !
733 printf("FIXME in gdcmHeader::IsInDicts\n");
736 found = RefPubDict->GetTag(group, element);
741 found = RefShaDict->GetTag(group, element);
748 string gdcmHeader::GetPubElValByNumber(guint16 group, guint16 element) {
749 return PubElVals.GetElValue(group, element);
752 string gdcmHeader::GetPubElValByName(string TagName) {
753 return PubElVals.GetElValue(TagName);
757 * \ingroup gdcmHeader
758 * \brief Parses the header of the file but does NOT load element values.
760 void gdcmHeader::ParseHeader(void) {
761 ElValue * newElValue = (ElValue *)0;
765 while ( (newElValue = ReadNextElement()) ) {
766 SkipElementValue(newElValue);
767 PubElVals.Add(newElValue);
772 * \ingroup gdcmHeader
773 * \brief Loads the element values of all the elements present in the
774 * public tag based hash table.
776 void gdcmHeader::LoadElements(void) {
778 TagElValueHT ht = PubElVals.GetTagHt();
779 for (TagElValueHT::iterator tag = ht.begin(); tag != ht.end(); ++tag)
780 LoadElementValue(tag->second);
783 void gdcmHeader::PrintPubElVal(ostream & os) {
787 void gdcmHeader::PrintPubDict(ostream & os) {
788 RefPubDict->Print(os);