10 #include <netinet/in.h>
12 #include <cctype> // for isalpha
17 #define HEADER_LENGHT_TO_READ 256 // on ne lit plus que le debut
20 struct FileReadError {
21 FileReadError(FILE* fp, const char* Mesg) {
23 dbg.Verbose(1, "EOF encountered :", Mesg);
25 dbg.Verbose(1, "Error on reading :", Mesg);
30 //FIXME: this looks dirty to me...
31 #define str2num(str, typeNum) *((typeNum *)(str))
33 VRHT * gdcmHeader::dicom_vr = (VRHT*)0;
34 gdcmDictSet* gdcmHeader::Dicts = new gdcmDictSet();
36 void gdcmHeader::Initialise(void) {
37 if (!gdcmHeader::dicom_vr)
39 RefPubDict = gdcmHeader::Dicts->GetDefaultPublicDict();
40 RefShaDict = (gdcmDict*)0;
43 gdcmHeader::gdcmHeader (const char* InFilename) {
44 filename = InFilename;
46 fp=fopen(InFilename,"rw");
47 dbg.Error(!fp, "gdcmHeader::gdcmHeader cannot open file", InFilename);
51 gdcmHeader::~gdcmHeader (void) {
56 void gdcmHeader::InitVRDict (void) {
58 dbg.Verbose(0, "gdcmHeader::InitVRDict:", "VR dictionary allready set");
62 (*vr)["AE"] = "Application Entity"; // 16 car max
63 (*vr)["AS"] = "Age String"; // 4 car fixe
64 (*vr)["AT"] = "Attribute Tag"; // 2 unsigned short int
65 (*vr)["CS"] = "Code String"; // 16 car max
66 (*vr)["DA"] = "Date"; // 8 car fixe
67 (*vr)["DS"] = "Decimal String"; // Decimal codé Binaire 16 max
68 (*vr)["DT"] = "Date Time"; // 26 car max
69 (*vr)["FL"] = "Floating Point Single"; // 4 octets IEEE 754:1985
70 (*vr)["FD"] = "Floating Point Double"; // 8 octets IEEE 754:1985
71 (*vr)["IS"] = "Integer String"; // en format externe 12 max
72 (*vr)["LO"] = "Long String"; // 64 octets max
73 (*vr)["LT"] = "Long Text"; // 10240 max
74 (*vr)["OB"] = "Other Byte String";
75 (*vr)["OW"] = "Other Word String";
76 (*vr)["PN"] = "Person Name";
77 (*vr)["SH"] = "Short String"; // 16 car max
78 (*vr)["SL"] = "Signed Long";
79 (*vr)["SQ"] = "Sequence of Items"; // Not Applicable
80 (*vr)["SS"] = "Signed Short"; // 2 octets
81 (*vr)["ST"] = "Short Text"; // 1024 car max
82 (*vr)["TM"] = "Time"; // 16 car max
83 (*vr)["UI"] = "Unique Identifier"; // 64 car max
84 (*vr)["UN"] = "Unknown";
85 (*vr)["UT"] = "Unlimited Text"; // 2 puissance 32 -1 car max
86 (*vr)["UL"] = "Unsigned Long "; // 4 octets fixe
87 (*vr)["US"] = "Unsigned Short "; // 2 octets fixe
93 * \brief La seule maniere sure que l'on aie pour determiner
94 * si on est en LITTLE_ENDIAN, BIG-ENDIAN,
95 * BAD-LITTLE-ENDIAN, BAD-BIG-ENDIAN
96 * est de trouver l'element qui donne la longueur d'un 'GROUP'
97 * (on sait que la longueur de cet element vaut 0x00000004)
98 * et de regarder comment cette longueur est codee en memoire
100 * Le probleme vient de ce que parfois, il n'y en a pas ...
102 * On fait alors le pari qu'on a a faire a du LITTLE_ENDIAN propre.
103 * (Ce qui est la norme -pas respectee- depuis ACR-NEMA)
104 * Si ce n'est pas le cas, on ne peut rien faire.
106 * (il faudrait avoir des fonctions auxquelles
107 * on passe le code Swap en parametre, pour faire des essais 'manuels')
109 void gdcmHeader::CheckSwap()
112 guint32 x=4; // x : pour ntohs
113 bool net2host; // true when HostByteOrder is the same as NetworkByteOrder
117 char deb[HEADER_LENGHT_TO_READ];
119 // First, compare HostByteOrder and NetworkByteOrder in order to
120 // determine if we shall need to swap bytes (i.e. the Endian type).
126 // The easiest case is the one of a DICOM header, since it possesses a
127 // file preamble where it suffice to look for the sting "DICM".
128 lgrLue = fread(deb, 1, HEADER_LENGHT_TO_READ, fp);
131 if(memcmp(entCur, "DICM", (size_t)4) == 0) {
132 filetype = TrueDicom;
133 dbg.Verbose(1, "gdcmHeader::CheckSwap:", "looks like DICOM Version3");
136 dbg.Verbose(1, "gdcmHeader::CheckSwap:", "not a DICOM Version3 file");
139 if(filetype == TrueDicom) {
140 // Next, determine the value representation (VR). Let's skip to the
141 // first element (0002, 0000) and check there if we find "UL", in
142 // which case we (almost) know it is explicit VR.
143 // WARNING: if it happens to be implicit VR then what we will read
144 // is the length of the group. If this ascii representation of this
145 // length happens to be "UL" then we shall believe it is explicit VR.
146 // FIXME: in order to fix the above warning, we could read the next
147 // element value (or a couple of elements values) in order to make
148 // sure we are not commiting a big mistake.
150 // * the 128 bytes of File Preamble (often padded with zeroes),
151 // * the 4 bytes of "DICM" string,
152 // * the 4 bytes of the first tag (0002, 0000),
153 // i.e. a total of 136 bytes.
155 if(memcmp(entCur, "UL", (size_t)2) == 0) {
156 filetype = ExplicitVR;
157 dbg.Verbose(0, "gdcmHeader::CheckSwap:",
158 "explicit Value Representation");
160 filetype = ImplicitVR;
161 dbg.Verbose(0, "gdcmHeader::CheckSwap:",
162 "not an explicit Value Representation");
167 dbg.Verbose(0, "gdcmHeader::CheckSwap:",
168 "HostByteOrder != NetworkByteOrder");
171 dbg.Verbose(0, "gdcmHeader::CheckSwap:",
172 "HostByteOrder = NetworkByteOrder");
175 // Position the file position indicator at first tag (i.e.
176 // after the file preamble and the "DICM" string).
178 fseek (fp, 132L, SEEK_SET);
180 } // End of TrueDicom
182 // Alas, this is not a DicomV3 file and whatever happens there is no file
183 // preamble. We can reset the file position indicator to where the data
184 // is (i.e. the beginning of the file).
187 // Our next best chance would be to be considering a 'clean' ACR/NEMA file.
188 // By clean we mean that the length of the first tag is written down.
189 // If this is the case and since the length of the first group HAS to be
190 // four (bytes), then determining the proper swap code is straightforward.
193 s = str2num(entCur, int);
213 dbg.Verbose(0, "gdcmHeader::CheckSwap:",
214 "ACE/NEMA unfound swap info (time to raise bets)");
217 // We are out of luck. It is not a DicomV3 nor a 'clean' ACR/NEMA file.
218 // It is time for despaired wild guesses. So, let's assume this file
219 // happens to be 'dirty' ACR/NEMA, i.e. the length of the group it
220 // not present. Then the only info we have is the net2host one.
221 //FIXME Si c'est du RAW, ca degagera + tard
231 * \ingroup gdcmHeader
232 * \brief recupere la longueur d'un champ DICOM.
234 * 1/ le fichier doit deja avoir ete ouvert,
235 * 2/ CheckSwap() doit avoir ete appele
236 * 3/ la partie 'group' ainsi que la partie 'elem'
237 * de l'acr_element doivent avoir ete lues.
239 * ACR-NEMA : we allways get
240 * GroupNumber (2 Octets)
241 * ElementNumber (2 Octets)
242 * ElementSize (4 Octets)
243 * DICOM en implicit Value Representation :
244 * GroupNumber (2 Octets)
245 * ElementNumber (2 Octets)
246 * ElementSize (4 Octets)
248 * DICOM en explicit Value Representation :
249 * GroupNumber (2 Octets)
250 * ElementNumber (2 Octets)
251 * ValueRepresentation (2 Octets)
252 * ElementSize (2 Octets)
254 * ATTENTION : dans le cas ou ValueRepresentation = OB, OW, SQ, UN
255 * GroupNumber (2 Octets)
256 * ElementNumber (2 Octets)
257 * ValueRepresentation (2 Octets)
258 * zone reservee (2 Octets)
259 * ElementSize (4 Octets)
261 * @param sw code swap
262 * @param skippedLength pointeur sur nombre d'octets que l'on a saute qd
263 * la lecture est finie
264 * @param longueurLue pointeur sur longueur (en nombre d'octets)
266 * @return longueur retenue pour le champ
269 void gdcmHeader::FindVR( ElValue *ElVal) {
273 long PositionOnEntry = ftell(fp);
274 // Warning: we believe this is explicit VR (Value Representation) because
275 // we used a heuristic that found "UL" in the first tag. Alas this
276 // doesn't guarantee that all the tags will be in explicit VR. In some
277 // cases (see e-film filtered files) one finds implicit VR tags mixed
278 // within an explicit VR file. Hence we make sure the present tag
279 // is in explicit VR and try to fix things if it happens not to be
281 bool RealExplicit = true;
283 if (filetype != ExplicitVR)
286 lgrLue=fread (&VR, (size_t)2,(size_t)1, fp);
290 // Assume we are reading a falsely explicit VR file i.e. we reached
291 // a tag where we expect reading a VR but are in fact we read the
292 // first to bytes of the length. Then we will interogate (through find)
293 // the dicom_vr dictionary with oddities like "\004\0" which crashes
294 // both GCC and VC++ implentations of the STL map. Hence when the
295 // expected VR read happens to be non-ascii characters we consider
296 // we hit falsely explicit VR tag.
298 if ( (!isalpha(VR[0])) && (!isalpha(VR[1])) )
299 RealExplicit = false;
301 // CLEANME searching the dicom_vr at each occurence is expensive.
302 // PostPone this test in an optional integrity check at the end
303 // of parsing or only in debug mode.
304 if ( RealExplicit && !dicom_vr->count(vr) )
305 RealExplicit = false;
307 if ( RealExplicit ) {
312 // We thought this was explicit VR, but we end up with an
313 // implicit VR tag. Let's backtrack.
314 dbg.Verbose(1, "gdcmHeader::FindVR:",
315 "Falsely explicit vr file");
316 ElVal->SetVR("Implicit");
317 fseek(fp, PositionOnEntry, SEEK_SET);
320 void gdcmHeader::FindLength( ElValue * ElVal) {
323 string vr = ElVal->GetVR();
325 if ( (filetype == ExplicitVR) && (vr != "Implicit") ) {
326 if ( (vr=="OB") || (vr=="OW") || (vr=="SQ") || (vr=="UN") ) {
328 // The following two bytes are reserved, so we skip them,
329 // and we proceed on reading the length on 4 bytes.
330 fseek(fp, 2L,SEEK_CUR);
331 length32 = ReadInt32();
334 // Length is encoded on 2 bytes.
335 length16 = ReadInt16();
337 if ( length16 == 0xffff) {
344 // Either implicit VR or an explicit VR that (at least for this
345 // element) lied a little bit. Length is on 4 bytes.
346 length32 = ReadInt32();
349 // Traitement des curiosites sur la longueur
350 if ( length32 == 0xffffffff)
353 ElVal->SetLength(length32);
358 * \ingroup gdcmHeader
359 * \brief remet les octets dans un ordre compatible avec celui du processeur
361 * @return longueur retenue pour le champ
363 guint32 gdcmHeader::SwapLong(guint32 a) {
364 // FIXME: il pourrait y avoir un pb pour les entiers negatifs ...
369 a=( ((a<<24) & 0xff000000) | ((a<<8) & 0x00ff0000) |
370 ((a>>8) & 0x0000ff00) | ((a>>24) & 0x000000ff) );
374 a=( ((a<<16) & 0xffff0000) | ((a>>16) & 0x0000ffff) );
378 a=( ((a<<8) & 0xff00ff00) | ((a>>8) & 0x00ff00ff) );
381 dbg.Error(" gdcmHeader::SwapLong : unset swap code");
388 * \ingroup gdcmHeader
389 * \brief Swaps the bytes so they agree with the processor order
391 * @return longueur retenue pour le champ
393 guint16 gdcmHeader::SwapShort(guint16 a) {
394 //FIXME how could sw be equal to 2143 since we never set it this way ?
395 if ( (sw==4321) || (sw==2143) )
396 a =(((a<<8) & 0x0ff00) | ((a>>8)&0x00ff));
400 void gdcmHeader::SkipElementValue(ElValue * ElVal) {
401 //FIXME don't dump the returned value
402 (void)fseek(fp, (long)ElVal->GetLength(), SEEK_CUR);
406 * \ingroup gdcmHeader
407 * \brief Loads the element if it's size is not to big.
408 * @param ElVal Element whose value shall be loaded.
409 * @param MaxSize Size treshold above which the element value is not
410 * loaded in memory. The element value is allways loaded
411 * when MaxSize is equal to UINT32_MAX.
414 void gdcmHeader::LoadElementValue(ElValue * ElVal) {
416 guint16 group = ElVal->GetGroup();
417 guint16 elem = ElVal->GetElement();
418 string vr = ElVal->GetVR();
419 guint32 length = ElVal->GetLength();
420 fseek(fp, (long)ElVal->GetOffset(), SEEK_SET);
422 // Sequences not treated yet !
424 SkipElementValue(ElVal);
428 // A sequence "contains" a set of tags (called items). It looks like
429 // the last tag of a sequence (the one that terminates the sequence)
430 // has a group of 0xfffe (with a dummy length).
431 if( group == 0xfffe) {
432 SkipElementValue(ElVal);
437 if ( IsAnInteger(group, elem, vr, length) ) {
440 NewInt = ReadInt16();
441 } else if( length == 4 ) {
442 NewInt = ReadInt32();
444 dbg.Error(true, "LoadElementValue: Inconsistency when reading Int.");
446 //FIXME: make the following an util fonction
449 ElVal->SetValue(s.str());
453 // FIXME The exact size should be length if we move to strings or whatever
454 char* NewValue = (char*)g_malloc(length+1);
456 dbg.Verbose(1, "LoadElementValue: Failed to allocate NewValue");
461 // FIXME les elements trop long (seuil a fixer a la main) ne devraient
462 // pas etre charge's !!!! Voir TODO.
463 item_read = fread(NewValue, (size_t)length, (size_t)1, fp);
464 if ( item_read != 1 ) {
466 Error::FileReadError(fp, "gdcmHeader::LoadElementValue");
467 ElVal->SetValue("gdcm::UnRead");
470 ElVal->SetValue(NewValue);
474 guint16 gdcmHeader::ReadInt16(void) {
477 item_read = fread (&g, (size_t)2,(size_t)1, fp);
478 if ( item_read != 1 )
479 throw Error::FileReadError(fp, "gdcmHeader::ReadInt16");
484 guint32 gdcmHeader::ReadInt32(void) {
487 item_read = fread (&g, (size_t)4,(size_t)1, fp);
488 if ( item_read != 1 )
489 throw Error::FileReadError(fp, "gdcmHeader::ReadInt32");
495 * \ingroup gdcmHeader
496 * \brief Read the next tag without loading it's value
497 * @return On succes the newly created ElValue, NULL on failure.
500 ElValue * gdcmHeader::ReadNextElement(void) {
509 catch ( Error::FileReadError ) {
510 // We reached the EOF (or an error occured) and header parsing
511 // has to be considered as finished.
515 // Find out if the tag we encountered is in the dictionaries:
516 gdcmDictEntry * NewTag = IsInDicts(g, n);
518 NewTag = new gdcmDictEntry(g, n, "Unknown", "Unknown", "Unknown");
520 NewElVal = new ElValue(NewTag);
522 dbg.Verbose(1, "ReadNextElement: failed to allocate ElValue");
527 FindLength(NewElVal);
528 NewElVal->SetOffset(ftell(fp));
532 bool gdcmHeader::IsAnInteger(guint16 group, guint16 element,
533 string vr, guint32 length ) {
534 // When we have some semantics on the element we just read, and we
535 // a priori now we are dealing with an integer, then we can swap it's
536 // element value properly.
537 if ( element == 0 ) { // This is the group length of the group
539 dbg.Error("gdcmHeader::ShouldBeSwaped", "should be four");
543 if ( group % 2 != 0 )
544 // We only have some semantics on documented elements, which are
548 if ( (length != 4) && ( length != 2) )
549 // Swapping only make sense on integers which are 2 or 4 bytes long.
552 if ( (vr == "UL") || (vr == "US") || (vr == "SL") || (vr == "SS") )
555 if ( (group == 0x0028) && (element == 0x0005) )
556 // This tag is retained from ACR/NEMA
557 // CHECKME Why should "Image Dimensions" be a single integer ?
560 if ( (group == 0x0028) && (element == 0x0200) )
561 // This tag is retained from ACR/NEMA
568 * \ingroup gdcmHeader
569 * \brief Recover the offset (from the beginning of the file) of the pixels.
571 size_t gdcmHeader::GetPixelOffset(void) {
572 // If this file complies with the norm we should encounter the
573 // "Image Location" tag (0x0028, 0x0200). This tag contains the
574 // the group that contains the pixel data (hence the "Pixel Data"
575 // is found by indirection through the "Image Location").
576 // Inside the group pointed by "Image Location" the searched element
577 // is conventionally the element 0x0010 (when the norm is respected).
578 // When the "Image Location" is absent we default to group 0x7fe0.
581 string ImageLocation = GetPubElValByName("Image Location");
582 if ( ImageLocation == "UNFOUND" ) {
585 grPixel = (guint16) atoi( ImageLocation.c_str() );
587 if (grPixel != 0x7fe0)
588 // FIXME is this still necessary ?
589 // Now, this looks like an old dirty fix for Philips imager
593 ElValue* PixelElement = PubElVals.GetElement(grPixel, numPixel);
595 return PixelElement->GetOffset();
600 gdcmDictEntry * gdcmHeader::IsInDicts(guint32 group, guint32 element) {
601 gdcmDictEntry * found = (gdcmDictEntry*)0;
602 if (!RefPubDict && !RefShaDict) {
603 //FIXME build a default dictionary !
604 printf("FIXME in gdcmHeader::IsInDicts\n");
607 found = RefPubDict->GetTag(group, element);
612 found = RefShaDict->GetTag(group, element);
619 string gdcmHeader::GetPubElValByNumber(guint16 group, guint16 element) {
620 return PubElVals.GetElValue(group, element);
623 string gdcmHeader::GetPubElValByName(string TagName) {
624 return PubElVals.GetElValue(TagName);
628 * \ingroup gdcmHeader
629 * \brief Parses the header of the file but does NOT load element values.
631 void gdcmHeader::ParseHeader(void) {
632 ElValue * newElValue = (ElValue *)0;
636 while ( (newElValue = ReadNextElement()) ) {
637 SkipElementValue(newElValue);
638 PubElVals.Add(newElValue);
643 * \ingroup gdcmHeader
644 * \brief Loads the element values of all the elements present in the
645 * public tag based hash table.
647 void gdcmHeader::LoadElements(void) {
649 TagElValueHT ht = PubElVals.GetTagHt();
650 for (TagElValueHT::iterator tag = ht.begin(); tag != ht.end(); ++tag)
651 LoadElementValue(tag->second);
654 void gdcmHeader::PrintPubElVal(ostream & os) {
658 void gdcmHeader::PrintPubDict(ostream & os) {
659 RefPubDict->Print(os);