#!/usr/bin/perl # med2ace.pl # Converts a medline file to a .ace file for the MalDB database. # The medline fields used are signified by: - # UI - A medline record number. Should be first field entry # because this program uses it to seperate records and # initiate translating last record from medline format # to .ace format. # TI - Title of the paper. # AU - Author # SO - Journal, year, volume, and page. # AB - Abstract of paper # All other fields are ignored and, apart from UI, the fields # may appear in any order. ($#ARGV>=0) || die "usage: med2ace.pl [-D] > if -D is presented, the ace file created will have \"-D : id\" line proceeded to each object \": id\", as the \"-D\" flag of ACeDB; use - for standard input; the default output is on the screen; options must be given in the order shown above\n"; # If -D option used, set $VAR_D and increment $ARGV pointer. if ($ARGV[0] =~ /^-D$/) { $VAR_D = 1; shift } else { $VAR_D = 0 }; #Open medline file. open (INPUT, "<$ARGV[0]"); #Initialise fields to "". &init; #Ignore first UI. $start = 1; while (){ chop; # If line is not the beginning of a new field, # append line to current field, # else check if previous field is relevant to .ace output file. if ($_ !~ /^[A-Z]/){ $_ =~ s/^ //; $section = $section . "\n" . $_ } else { # If previous field is AU, store author in $auth array. if ($section =~ /^AU/){ $author = $section; &author; } # Store Title field. if ($section =~ /^TI/){ $title = $section; } # Store Journal field. if ($section =~ /^SO/){ $journalstats = $section; # Store Abstract field. } if ($section =~ /^AB/){ $abstract = $section; } # If line is beginning of a new record (but not 1st record), # extract .ace file data from stored medline data # and print to stdout in .ace format. if (($_ =~ /^UI - /) && ($start == 0)) { &title; &journalstats; &abstract; &printout; &init; } else { if ($start == 1){ $start = 0; } else { $section = $_; } } } } # Repeat process for final record. if ($section =~ /^AU/){ $author = $section; &author; } if ($section =~ /^TI/){ $title = $section; } if ($section =~ /^SO/){ $journalstats = $section; } if ($section =~ /^AB/){ $abstract = $section; } &title; &journalstats; &abstract; &printout; #Initalise variables for storing relevent medline data. sub init { $title = ""; $author = ""; $journalstats = ""; $abstract = ""; $section = ""; $n = 0; $Pap = ""; $tit = ""; $Jour = ""; $Year = ""; $Vol = ""; $Pag= ""; $Abs = ""; $LongText = ""; } # Store author in $auth array. sub author { $author =~ s/\n//g; $auth[$n] = $author; $auth[$n] =~ s/^AU - /Author \"/; $auth[$n] .= "\"\n"; $n++; $section = ""; } # Store title in $tit. sub title { $title =~ s/\n//g; $tit = $title; $tit =~ s/\"/\'/g; $tit =~ s/^TI - /Title \"/; $tit .= "\"\n"; $section = ""; } # Store journal, year, volume and page no. in $Jour, $Year, $Vol, and $Pag # and define the paper as the concatonation of these variables, for the # .ace Paper entry. sub journalstats { $journalstats =~ s/\n//g; $journalstats =~ s/ 19/:19/; $journalstats =~ s/;/:/; ($Jour, $Year, $Vol, $Pag) = split(":", $journalstats); $Pap = "Paper : \"" . $Jour . "_" . $Year . "_" . $Vol . "_" . $Pag . "\"\n"; $Pap =~ s/^Paper : "SO - /Paper : "/; $Jour =~ s/"/'/g; $Jour =~ s/^SO - /Journal "/; $Jour = $Jour . "\"\n"; $Year =~ s/^19\(\d\d\)\s.*/19\1\2/g; $Year =~ s/\D//g; $Year =~ s/^19\d\d/$&/g; $Year = "Year " . $Year . "\n"; $Vol =~ s/\s/,/g; $Vol = "Volume " . $Vol . "\n"; $Pag =~ s/\s/,/g; $Pag = "Page " . $Pag . "\n"; $section = ""; } # Store abstract in $LongText and use $Pap as the header for # the abstract. sub abstract { $Abs = $Pap; $Abs =~ s/^Paper : /Abstract /; $LongTit = $Abs; $LongTit =~ s/^Abstract //; $LongText = $abstract; $LongText =~ s/^AB - //; $LongText = "LongText " . $LongTit . $LongText . "\n***LongTextEnd***\n"; $LongText =~ s/\n /\n/g; $Abs = $Abs . "\n"; $section = ""; } # Print record to stdout in .ace format. sub printout { print "\n-D $Pap\n\n" if ($VAR_D); print $Pap; for ($i = 0; $i < $n; $i++){ print $auth[$i]; } # @auth = (); print $tit; print $Jour; print $Year; print $Vol; print $Pag; print $Abs; print $LongText; print "\n\n\n"; }