]> Creatis software - gdcm.git/blob - Example/SplitIntoDirectories.cxx
Allow user to give a 'lexicographical compliant' name to the files
[gdcm.git] / Example / SplitIntoDirectories.cxx
1 /*=========================================================================
2
3   Program:   gdcm
4   Module:    $RCSfile: SplitIntoDirectories.cxx,v $
5   Language:  C++
6   Date:      $Date: 2007/09/26 16:19:54 $
7   Version:   $Revision: 1.2 $
8                                                                                 
9   Copyright (c) CREATIS (Centre de Recherche et d'Applications en Traitement de
10   l'Image). All rights reserved. See Doc/License.txt or
11   http://www.creatis.insa-lyon.fr/Public/Gdcm/License.html for details.
12                  
13      This software is distributed WITHOUT ANY WARRANTY; without even
14      the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15      PURPOSE.  See the above copyright notices for more information.
16                                                                                 
17 =========================================================================*/
18 #include "gdcmDocEntry.h"
19 #include "gdcmDicomDir.h"
20 #include "gdcmDicomDirPatient.h"
21 #include "gdcmFile.h"
22 #include "gdcmFileHelper.h"
23 #include "gdcmDirList.h"
24 #include "gdcmDebug.h"
25 #include "gdcmArgMgr.h"
26 #include "gdcmUtil.h"
27 #include "gdcmSerieHelper.h"
28
29 #include <iostream>
30
31 /**
32   * \brief   
33   *          - explores recursively the given directory
34   *          - keeps the requested series
35   *          - orders the gdcm-readable found Files
36   *            according to their Patient/Study/Serie/Image characteristics
37   */  
38
39 typedef std::map<std::string, GDCM_NAME_SPACE::File*> SortedFiles;
40
41 int main(int argc, char *argv[]) 
42 {
43    START_USAGE(usage)
44    " \n SplitIntoDirectories :\n                                              ",
45    " - explores recursively the given directory,                              ",
46    " - keeps the requested series / drops the unrequested series              ",
47    " - orders the gdcm-readable found Files according to their                ",
48    "           (0x0010, 0x0010) Patient's Name                                ",
49    "           (0x0020, 0x000d) Study Instance UID                            ",   
50    "           (0x0020, 0x000e) Series Instance UID                           ",
51    " - fills a tree-like structure of directories as :                        ",
52    "        - Patient                                                         ",
53    "        -- Study                                                          ",
54    "        --- Serie                                                         ",
55    "                                                                          ",
56    " usage:                                                                   ",
57    " -----                                                                    ",
58    " SplitIntoDirectories                                                     ",
59    "                  dirin=rootDirectoryName                                 ",
60    "                  dirout=outputDirectoryName                              ",
61    "                  {  [keep= list of seriesNumber to process]              ",
62    "                   | [drop= list of seriesNumber to ignore] }             ",
63    "                  [listonly]  [skel]                                      ",
64    "                  [noshadowseq][noshadow][noseq] [verbose] [debug]        ",
65    "                                                                          ",
66    " dirout : will be created if doesn't exist                                ",
67    " keep : if user wants to process a limited number of series               ",
68    "            he gives the list of 'SeriesNumber' (tag 0020|0011)           ",
69    " drop : if user wants to ignore a limited number of series                ",
70    "            he gives the list of 'SeriesNumber' (tag 0020|0011)           ",
71    "        SeriesNumber are short enough to be human readable                ",
72    "        e.g : 1030,1035,1043                                              ",
73    " skel : name skeleton eg : patName_1.nema -> skel=patName_                ",
74    " noshadowseq: user doesn't want to load Private Sequences                 ",
75    " noshadow : user doesn't want to load Private groups (odd number)         ",
76    " noseq    : user doesn't want to load Sequences                           ",
77    " verbose  : user wants to run the program in 'verbose mode'               ",
78    " debug    : *developer*  wants to run the program in 'debug mode'         ",
79    FINISH_USAGE
80
81
82    enum Index
83    {
84       IND_PatientName,
85       IND_StudyInstanceUID,
86       IND_SerieInstanceUID,
87       IND_FileName       
88    };
89       
90    std::cout << "... inside " << argv[0] << std::endl;
91    
92    // ----- Initialize Arguments Manager ------
93       
94    GDCM_NAME_SPACE::ArgMgr *am = new GDCM_NAME_SPACE::ArgMgr(argc, argv);
95   
96    if (argc == 1 || am->ArgMgrDefined("usage")) 
97    {
98       am->ArgMgrUsage(usage); // Display 'usage'
99       delete am;
100       return 0;
101    }
102
103    const char *dirNamein;   
104    dirNamein  = am->ArgMgrGetString("dirin","."); 
105
106    const char *dirNameout;   
107    dirNameout  = am->ArgMgrGetString("dirout",".");  
108    
109    int loadMode = GDCM_NAME_SPACE::LD_ALL;
110    if ( am->ArgMgrDefined("noshadowseq") )
111       loadMode |= GDCM_NAME_SPACE::LD_NOSHADOWSEQ;
112    else 
113    {
114    if ( am->ArgMgrDefined("noshadow") )
115          loadMode |= GDCM_NAME_SPACE::LD_NOSHADOW;
116       if ( am->ArgMgrDefined("noseq") )
117          loadMode |= GDCM_NAME_SPACE::LD_NOSEQ;
118    }
119
120    if (am->ArgMgrDefined("debug"))
121       GDCM_NAME_SPACE::Debug::DebugOn();
122
123    bool verbose  = ( 0 != am->ArgMgrDefined("verbose") );
124    bool listonly = ( 0 != am->ArgMgrDefined("listonly") );
125            
126    int nbSeriesToKeep;
127    int *seriesToKeep = am->ArgMgrGetListOfInt("keep", &nbSeriesToKeep);
128    int nbSeriesToDrop;
129    int *seriesToDrop = am->ArgMgrGetListOfInt("drop", &nbSeriesToDrop);
130  
131    if ( nbSeriesToKeep!=0 && nbSeriesToDrop!=0)
132    {
133       std::cout << "KEEP and DROP are mutually exclusive !" << std::endl;
134       delete am;
135       return 0;         
136    }
137
138    bool hasSkel = ( 0 != am->ArgMgrDefined("hasSkel") );    
139    const char *skel;
140    if (hasSkel)
141       skel = am->ArgMgrGetString("skel");   
142       
143       
144    const char *input   = am->ArgMgrGetString("input","DCM");
145    
146    // if unused Param we give up
147    if ( am->ArgMgrPrintUnusedLabels() )
148    { 
149       am->ArgMgrUsage(usage);
150       delete am;
151       return 0;
152    }
153    delete am;  // we don't need Argument Manager any longer
154
155    // ----- Begin Processing -----
156    
157      
158    // --> Check supposed-to-be-directory names
159    
160    if ( ! GDCM_NAME_SPACE::DirList::IsDirectory(dirNamein) )
161    {
162       std::cout << "KO : [" << dirNamein << "] is not a Directory."
163                 << std::endl;
164       return 0;
165
166    }
167    else
168    {
169       std::cout << "OK : [" << dirNamein << "] is a Directory." << std::endl;
170    }
171
172    std::string systemCommand;
173    
174    std::cout << "Check for output directory :[" << dirNameout << "]."
175              <<std::endl;
176    if ( ! GDCM_NAME_SPACE::DirList::IsDirectory(dirNameout) )    // dirout not found
177    {
178       std::string strDirNameout(dirNameout);          // to please gcc 4
179       systemCommand = "mkdir " +strDirNameout;        // create it!
180       if (verbose)
181          std::cout << systemCommand << std::endl;
182       system (systemCommand.c_str());
183       if ( ! GDCM_NAME_SPACE::DirList::IsDirectory(dirNameout) ) // be sure it worked
184       {
185           std::cout << "KO : not a dir : [" << dirNameout << "] (creation failure ?)" 
186                     << std::endl;
187       return 0;
188
189       }
190       else
191       {
192         std::cout << "Directory [" << dirNameout << "] created." << std::endl;
193       }
194    }
195    else
196    {
197        std::cout << "Output Directory [" << dirNameout 
198                  << "] already exists; Used as is."
199                  << std::endl;
200    }
201    // --> End of checking supposed-to-be-directory names
202        
203    std::string strDirNamein(dirNamein);
204    // true ; get recursively the list of files
205    GDCM_NAME_SPACE::DirList dirList(strDirNamein, true); 
206    
207    if (listonly)
208    {
209       std::cout << "------------List of found files ------------" << std::endl;
210       dirList.Print();
211       std::cout << std::endl;
212    }
213
214
215 // ======================================= The job starts here =========================
216    
217    GDCM_NAME_SPACE::DirListType fileNames;
218    fileNames = dirList.GetFilenames();
219
220    GDCM_NAME_SPACE::SerieHelper *s;     // Needed to use SerieHelper::AddSeriesDetail()
221    s = GDCM_NAME_SPACE::SerieHelper::New();
222
223    std::string token = "%%%"; // Hope it's enough!
224   
225    GDCM_NAME_SPACE::File *f;
226    std::vector<std::string> tokens;
227    std::vector<std::string> tokensForFileName;
228    
229    if (verbose)
230       std::cout << "------------------Print Break levels-----------------" << std::endl;
231
232    std::string userFileIdentifier;
233    SortedFiles sf;
234  
235    s->AddSeriesDetail(0x0010, 0x0010, false); // Patient's Name (false : no convert)
236    
237    // You may prefer 0020 0010  Study ID
238    // use :
239    // s->AddSeriesDetail(0x0020, 0x0010, true); 
240    // Avoid using 0008 0020 Study Date, 
241    // since you may have more than one study, for a given Patient, at a given Date!
242    // or the field may be empty!   
243    s->AddSeriesDetail(0x0020, 0x000d, false); // Study Instance UID (false : no convert)
244
245
246    // You may prefer 0020 0011 Series Number
247    // use :
248    // s->AddSeriesDetail(0x0020, 0x0011, true);    
249    s->AddSeriesDetail(0x0020, 0x000e, false); // Series Instance UID (false : no convert)
250    
251    // Feel free to add more fields, if they can help a suitable (for you)
252    // image sorting
253
254 // Loop on all the gdcm-readable files
255    for (GDCM_NAME_SPACE::DirListType::iterator it = fileNames.begin();
256                                     it != fileNames.end();
257                                   ++it)
258    {
259       f = GDCM_NAME_SPACE::File::New();
260       f->SetLoadMode(loadMode);
261       f->SetFileName( *it );
262       f->Load();
263
264       std::string strSeriesNumber;
265       int seriesNumber;
266       int j;
267
268       // keep only requested Series
269       bool keep = false;
270       if (nbSeriesToKeep != 0)
271       {
272          strSeriesNumber = f->GetEntryString(0x0020, 0x0011 );
273          seriesNumber = atoi( strSeriesNumber.c_str() );
274          for (j=0; j<nbSeriesToKeep; j++)
275          {
276             if(seriesNumber == seriesToKeep[j])
277             {
278                keep = true;
279                break;
280             }
281          }
282          if ( !keep)
283          {
284             f->Delete();
285             continue;
286          }
287       }
288       // drop all unrequested Series
289       bool drop = false;
290       if (nbSeriesToDrop != 0)
291       {     
292          strSeriesNumber = f->GetEntryString(0x0020, 0x0011 );
293          seriesNumber = atoi( strSeriesNumber.c_str() );
294          for (j=0;j<nbSeriesToDrop; j++)
295          {
296             if(seriesNumber == seriesToDrop[j])
297             { 
298                drop = true;
299                break;
300             }
301         }
302         if (drop)
303         {
304            f->Delete();
305            continue;
306         }
307       }
308
309       userFileIdentifier=s->CreateUserDefinedFileIdentifier(f);
310       tokens.clear();
311       GDCM_NAME_SPACE::Util::Tokenize (userFileIdentifier, tokens, token);
312
313       int imageNum; // Within FileName
314       char newName[1024];
315       
316       ///this is a trick to build up a lexicographical compliant name :
317       ///     eg : fich001.ima vs fich100.ima as opposed to fich1.ima vs fich100.ima
318       std::string name = GDCM_NAME_SPACE::Util::GetName( *it );
319
320       if (hasSkel)
321       {
322          int imageNum; // Within FileName
323          GDCM_NAME_SPACE::Util::Tokenize (name, tokensForFileName, skel);
324          imageNum = atoi ( tokensForFileName[0].c_str() );
325          // probabely we could write something much more complicated using C++ !
326          sprintf (newName, "%s%06d.dcm", skel, imageNum);
327          tokens[IND_FileName] = newName;
328          tokensForFileName.clear();
329        }
330        else
331        {
332          tokens[IND_FileName] = name;
333        }   
334     
335          // Patient's Name
336          // Study Instance UID 
337          // Series Instance UID
338          // file Name
339
340       userFileIdentifier = tokens[IND_PatientName]      + token +
341                            tokens[IND_StudyInstanceUID] + token + 
342                            tokens[IND_SerieInstanceUID] + token +
343                            tokens[IND_FileName] + token;
344          
345       if (verbose) 
346          std::cout << "[" << userFileIdentifier  << "] : " << *it << std::endl;
347                
348       // storing in a map ensures automatic sorting !      
349       sf[userFileIdentifier] = f;
350    }
351    
352    if (verbose)
353       std::cout << "  " << std::endl;
354       
355    std::string fullFilename, lastFilename;
356    std::string previousPatientName, currentPatientName;
357    std::string previousStudyInstanceUID, currentStudyInstanceUID;   
358    std::string previousSerieInstanceUID, currentSerieInstanceUID;
359    
360       
361    std::string writeDir, currentWriteDir;
362    std::string currentPatientWriteDir;
363    std::string currentStudyWriteDir;
364    std::string currentSerieWriteDir; 
365
366    std::string fullWriteFilename;
367            
368    writeDir = GDCM_NAME_SPACE::Util::NormalizePath(dirNameout);     
369    SortedFiles::iterator it2;
370  
371    previousPatientName            = "";
372    previousStudyInstanceUID       = "";    
373    previousSerieInstanceUID       = "";   
374        
375    GDCM_NAME_SPACE::File *currentFile;
376      
377    for (it2 = sf.begin() ; it2 != sf.end(); ++it2)
378    {  
379       currentFile = it2->second;
380        
381       fullFilename =  currentFile->GetFileName();
382       lastFilename =  GDCM_NAME_SPACE::Util::GetName( fullFilename );
383       if (verbose) 
384       std::cout <<" ------------------------------------------------------------------------------" 
385                 << std::endl << " Deal with [" << it2->first << "] : [" <<fullFilename << "]" 
386                 << std::endl;
387      
388       tokens.clear();
389       GDCM_NAME_SPACE::Util::Tokenize (it2->first, tokens, token);
390       
391       currentPatientName            = tokens[IND_PatientName];
392       currentStudyInstanceUID       = tokens[IND_StudyInstanceUID];      
393       currentSerieInstanceUID       = tokens[IND_SerieInstanceUID];
394      
395       if (previousPatientName != currentPatientName)
396       {  
397          previousPatientName = currentPatientName;
398          if (verbose)   
399             std::cout << "==== new Patient  [" << currentPatientName  << "]" << std::endl;
400     
401          previousPatientName            = currentPatientName;
402          previousStudyInstanceUID       = ""; 
403          previousSerieInstanceUID       = "";
404   
405          currentPatientWriteDir = writeDir + currentPatientName;
406
407          systemCommand   = "mkdir " + currentPatientWriteDir;
408          if (verbose)
409             std::cout << systemCommand << std::endl;
410    
411          system ( systemCommand.c_str() );
412       }
413       
414       if (previousStudyInstanceUID != currentStudyInstanceUID)
415       {        
416          previousStudyInstanceUID       = currentStudyInstanceUID;
417          if (verbose)   
418             std::cout << "==== === new Study [" << currentStudyInstanceUID << "]"
419                       << std::endl;      
420
421          currentStudyWriteDir  = currentPatientWriteDir + GDCM_NAME_SPACE::GDCM_FILESEPARATOR
422                              + currentStudyInstanceUID;
423          systemCommand   = "mkdir " + currentStudyWriteDir;  
424          system (systemCommand.c_str());
425
426       }  
427       
428       if (previousSerieInstanceUID != currentSerieInstanceUID)
429       {        
430          previousSerieInstanceUID       = currentSerieInstanceUID;
431          if (verbose)   
432             std::cout << "=== ==== === new Serie [" << currentSerieInstanceUID << "]"
433                       << std::endl;      
434
435          currentSerieWriteDir  = currentStudyWriteDir + GDCM_NAME_SPACE::GDCM_FILESEPARATOR
436                              + currentSerieInstanceUID;
437          systemCommand   = "mkdir " + currentSerieWriteDir;  
438          system (systemCommand.c_str());
439       }            
440    
441       if ( GDCM_NAME_SPACE::Debug::GetDebugFlag())
442          std::cout << "--- --- --- --- --- " << it2->first << "  " 
443                    << (it2->second)->GetFileName() << " " 
444                    << GDCM_NAME_SPACE::Util::GetName( fullFilename ) << std::endl;
445  
446       // If you want to create file names of your own, here is the place!
447       // Just replace 'lastFilename' by anything that's better for you.               
448       fullWriteFilename = currentSerieWriteDir + GDCM_NAME_SPACE::GDCM_FILESEPARATOR 
449                                          + lastFilename; 
450
451       systemCommand   = "cp " + fullFilename + " " + fullWriteFilename;
452       system ( systemCommand.c_str());          
453
454    }
455    return 0;
456  }
457