#!/usr/monash/bin/perl ($#ARGV>=0) || die "usage: embl2ace [-D] [-o ] if -D is presented, the ace file created will have \"-D : id\" line proceeded to each object \": id\", as the \"-D\" flag of ACeDB; use - for standard input; the default output is on the screen; options must be given in the order shown above\n"; if ($ARGV[0] =~ /^-D$/) { $VAR_D = 1; shift } else { $VAR_D = 0 }; # Open all files for output. open (INPUT, "<$ARGV[0]"); open (OUTPUT1, ">chr1.ace"); open (OUTPUT2, ">chr2.ace"); open (OUTPUT3, ">chr3.ace"); open (OUTPUT4, ">chr4.ace"); open (OUTPUT5, ">chr5.ace"); open (OUTPUT6, ">chr6.ace"); open (OUTPUT7, ">chr7.ace"); open (OUTPUT8, ">chr8.ace"); open (OUTPUT9, ">chr9.ace"); open (OUTPUT10, ">chr10.ace"); open (OUTPUT11, ">chr11.ace"); open (OUTPUT12, ">chr12.ace"); open (OUTPUT13, ">chr13.ace"); open (OUTPUT14, ">chr14.ace"); $clone_count = 0; # for the construction of grid data # of positive clones to STS probes. # Get to first Sequence. while (){ if (/^BOT/) { last } } # Extract Sequence information. # See sub initialise for variable interpretations. SCAN: while (){ &initialise; $STS = ; $dummy = ; $primer1 = ; while (){ if ($_ =~ /^1,0/) {last} else { $primer1 .= $_; } } $primer2 = ; while (){ if ($_ =~ /^1,0/) { last; } else { $primer2 .= $_; } } $locus = ; $dummy = ; $other_names = ; $PCR_product_size = ; $dummy = ; $dummy = ; $probe_name = ; $dummy = ; $probe_sequence = ; while (){ if ($_ =~ /^0,/) { $YACs = $_; last; } else { $probe_sequence .= $_; } } $probe_sequence =~ s/[^catgACTG\n]//g; $probe_sequence =~ s/^\s*\n//g; $probe_sequence =~ s/\n\s*\n/\n/g; $dummy = ; $chromosome = ; $chromosome =~ s/^0,//; $chromosome =~ s/\s*\n//g; $chromosome =~ s/^0+//; $dummy = ; $dummy = ; while (){ if ($_ =~ /^1,0/) { last; } else { if ($_ !~ /^\s*\n$/) { $_ =~ s/\n/ \\\n/g; $comments .= $_; } } } $comments =~ s/\s*\\\n$/\n/; $entry_date = ; $dummy = ; $mod_date = ; $serial_num = ; $dummy = ; $dummy = ; $well_location = ; $dummy = ; $YAC_pool = ; $dummy = ; $fragment_locale = ; $dummy = ; $YAC_library = ; while (){ if ($_ =~ /^1,0/) { last; } else { $YAC_library .= $_; } } $reaction_cond = ; $dummy = ; $probe_def = ; $dummy = ; $isolates = ; $probe_size = ; $dummy = ; $dummy = ; $status = ; $dummy = ; $public = ; $dummy = ; $provisional = ; $dummy = ; while (){ if ($_ =~ /^0,/) { $YAC_insert_sizes = $_; last; } else { $approx_locale .= $_; } } $dummy = ; $dummy = ; $Genbank_ID = ; $dummy = ; $rows = ; $columns = ; $dummy = ; $dummy = ; $Lab_YAC_name = ; $dummy = ; $dummy = ; &print_out; if ($dummy =~ /^EOD/){ &printGrid; close(OUTPUT1); close(OUTPUT2); close(OUTPUT3); close(OUTPUT4); close(OUTPUT5); close(OUTPUT6); close(OUTPUT7); close(OUTPUT8); close(OUTPUT9); close(OUTPUT10); close(OUTPUT11); close(OUTPUT12); close(OUTPUT13); close(OUTPUT14); last SCAN; } } close (INPUT); # Set variables' values to empty. sub initialise { $STS = ""; # STS name. $primer1 = ""; # Locus Primer1. $primer2 = ""; # Locus Primer2. $locus = ""; # Locus name. $other_names = ""; $PCR_product_size = ""; # Locus Length. $probe_name = ""; $probe_sequence = ""; # DNA sequence. $YACs = ""; # YAC number. $chromosome = ""; # chromosome number. $comments = ""; # Sequence and Locus Remark. $entry_date = ""; # Sequence and Locus Entry_date. $mod_date = ""; # Sequence and Locus Modification_date. $serial_num = ""; $well_location = ""; $YAC_pool = ""; # Clone In_pool. $fragment_locale = ""; $YAC_library = ""; # Clone name = YAC_library + YAC number. $reaction_cond = ""; # Locus React_cond. $probe_def = ""; $isolates = ""; # Clone Isolate. $probe_size = ""; # Sequence DNA Int. $status = ""; # Locus Status. $public = ""; $provisional = ""; # Locus Provisional. $approx_locale = ""; $YAC_insert_sizes = ""; # Clone Gel_length. $Genbank_ID = ""; $rows = ""; $columns = ""; $Lab_YAC_name = ""; } # Print out instance of Sequence and # associated instances of Locus, Clones, STS, Oligos, and Grid_data. sub print_out { local($i); if ($STS =~ /^\s*""\s*$/){ return; } # Print to file that sequence is relevant to. # that is, the chromosome number. $file_desig = $chromosome; if ($file_desig eq 1) { select(OUTPUT1); } elsif ($file_desig eq 2) { select(OUTPUT2); } elsif ($file_desig eq 3) { select(OUTPUT3); } elsif ($file_desig eq 4) { select(OUTPUT4); } elsif ($file_desig eq 5) { select(OUTPUT5); } elsif ($file_desig eq 6) { select(OUTPUT6); } elsif ($file_desig eq 7) { select(OUTPUT7); } elsif ($file_desig eq 8) { select(OUTPUT8); } elsif ($file_desig eq 9) { select(OUTPUT9); } elsif ($file_desig eq 10) { select(OUTPUT10); } elsif ($file_desig eq 11) { select(OUTPUT11); } elsif ($file_desig eq 12) { select(OUTPUT12); } elsif ($file_desig eq 13) { select(OUTPUT13); } elsif ($file_desig eq 14) { select(OUTPUT14); } else { print "CHROM: $file_desig\n"; } # Print out Sequence. $STS =~ s/\s*//g; $STS =~ s/\n//; print "\n-D Sequence : $STS\n\n" if ($VAR_D); print "Sequence : $STS\n"; $probe_size =~ s/^0,//; $probe_size =~ s/[^0-9]//g; print "DNA $STS $probe_size\n"; $entry_date =~ s/\"//g; $entry_date =~ s/\n//g; $ACE_entry_date = &AceDate($entry_date); print "Entry_date \"$ACE_entry_date\"\n"; $mod_date =~ s/\"//g; $mod_date =~ s/\n//g; $ACE_mod_date = &AceDate($mod_date); print "Modification_date \"$ACE_mod_date\"\n"; $YACs =~ s/^0,//; $YACs =~ s/\n//; $YACs =~ s/[a-zA-Z]//g; $YACs =~ s/\s+/ /g; @YAC_array = split(" ", $YACs); $YAC_library =~ s/\n//g; $YAC_library =~ s/^"//g; $YAC_library =~ s/"\s*$//g; @YAC_lib_array = split('" "', $YAC_library); print "Locus $STS\n"; for ($i = 0; $i <= $#YAC_array; $i++){ $YAC_array[$i] =~ s/\s/_/g; $YAC_lib_array[$i] =~ s/\s/_/g; print "Clone \"$YAC_lib_array[$i]_$YAC_array[$i]\"\n"; } $Oligo1 = $STS; $Oligo1 =~ s/"\s*$/:1"/; $Oligo2 = $STS; $Oligo2 =~ s/"\s*$/:2"/; print "Oligo $Oligo1\n"; print "Oligo $Oligo2\n"; print "Remark $comments"; print "Organism \"Plasmodium falciparum\"\n"; print "\n\n\n\n"; # Print out Locus. print "-D Locus : $STS\n\n" if ($VAR_D); print "Locus : $STS\n"; $entry_date =~ s/\"//g; $entry_date =~ s/\n//g; $ACE_entry_date = &AceDate($entry_date); print "Entry_date \"$ACE_entry_date\"\n"; $mod_date =~ s/\"//g; $mod_date =~ s/\n//g; $ACE_mod_date = &AceDate($mod_date); print "Modification_date \"$ACE_mod_date\"\n"; $ACE_mod_date = &AceDate($mod_date); print "Modification_date \"$ACE_mod_date\"\n"; $primer1 =~ s/\n//g; print "Primer1 $primer1\n"; $primer2 =~ s/\n//g; print "Primer2 $primer2\n"; print "React_cond $reaction_cond"; $PCR_product_size =~ s/^0,//; $PCR_product_size =~ s/[^0-9]//g; print "Length $PCR_product_size\n"; for ($i = 0; $i <= $#YAC_array; $i++){ print "Positive_clone \"$YAC_lib_array[$i]_$YAC_array[$i]\"\n"; } print "Sequence $STS\n"; print "Remark $comments"; $map = &stringtoRoman($chromosome); if ($STS =~ /-0*([1-9][0-9]*)"/){ # May multiply this by appropriate scalar $map_position = $1; } print "Map \"$map\" Position $map_position\n"; print "Status $status"; print "Provisional $provisional"; print "Organism \"Plasmodium falciparum\"\n"; print "\n\n\n\n"; # Print out STS. print "\n-D DNA : $STS\n\n" if ($VAR_D); print "DNA : $STS\n"; $probe_sequence =~ s/"//g; print $probe_sequence; print "\n\n\n\n"; # Print out Clone. $YAC_pool =~ s/"//g; $YAC_pool =~ s/\n//g; @In_pool = split(" ", $YAC_pool); $isolates =~ s/\n//g; $isolates =~ s/^"//g; $isolates =~ s/"\s*$//g; @isolate_array = split('" "', $isolates); $YAC_insert_sizes =~ s/^0,//; $YAC_insert_sizes =~ s/\s*\n//; $YAC_insert_sizes =~ s/[a-zA-Z]//g; $YAC_insert_sizes =~ s/\s+/ /g; @YAC_length_array = split(" ", $YAC_insert_sizes); for ($i = 0; $i <= $#YAC_array; $i++){ print "\n-D Clone : \"$YAC_lib_array[$i]_$YAC_array[$i]\"\n" if ($VAR_D); print "Clone : \"$YAC_lib_array[$i]_$YAC_array[$i]\"\n"; print "YAC\n"; print "Sequence $STS\n"; print "In_pool $In_pool[$i]\n" if ($i <= $#In_pool); print "Isolate \"$isolate_array[$i]\"\n" if ($i <= $#isolate_array); print "Gel_length $YAC_length_array[$i]\n" if ($i <= $#YAC_length_array); print "Positive_locus $STS\n"; print "Organism \"Plasmodium falciparum\"\n"; print "\n\n\n"; } # Print out Oligo1. re. Primer1. print "-D Oligo : $Oligo1\n\n" if ($VAR_D); print "Oligo : $Oligo1\n"; print "Sequence $primer1\n"; print "In_sequence $STS\n"; print "STS $STS\n"; print "\n\n\n"; # Print out Oligo21. re. Primer2. print "-D Oligo : $Oligo2\n\n" if ($VAR_D); print "Oligo : $Oligo2\n"; print "Sequence $primer2\n"; print "In_sequence $STS\n"; print "STS $STS\n"; print "\n\n\n"; # Print out STS. print "-D STS : $STS\n\n" if ($VAR_D); print "STS : $STS\n"; print "Oligo1 $Oligo1\n"; print "Oligo2 $Oligo2\n"; $Grid_data = $STS; $Grid_data =~ s/"//g; print "Grid_data \"Grid_" , $Grid_data , "_data\"\n"; print "\n\n\n"; print "-D Grid_data : \"Grid_" , $Grid_data , "_data\"\n\n" if ($VAR_D); print "Grid_data : \"Grid_" , $Grid_data , "_data\"\n"; print "Date \"$ACE_entry_date\"\n"; for ($i = 0; $i <= $#YAC_array; $i++){ print "Hybridizes_to \"$YAC_lib_array[$i]_$YAC_array[$i]\"\n"; } print "Grid \"Grid_" , $Grid_data , "\"\n"; print "Default_negative\n"; print "\n\n\n"; # Store positive clones for each grid that represents an STS probe. # This information is used by sub printGrid. # Store the name of the grid. $Grid_data[$clone_count] = $Grid_data; # Each grid is to be printed to the relevant file. $file_desig[$clone_count] = $file_desig; # Store the list of positive clones for this STS. for ($i = 0; $i <= $#YAC_array; $i++){ $clone[$clone_count][$i] = "\"$YAC_lib_array[$i]_$YAC_array[$i]\""; } $clone[$clone_count][$i++] = ""; # End of Pos. clones flag. $clone_count++; } # Print each Grid for each STS to the relevant file. sub printGrid { local($i, $j, $k); for ($i = 0; $i < $clone_count; $i++){ # Select file for this grid. if ($file_desig[$j] eq 1) { select(OUTPUT1); } elsif ($file_desig[$i] eq 2) { select(OUTPUT2); } elsif ($file_desig[$i] eq 3) { select(OUTPUT3); } elsif ($file_desig[$i] eq 4) { select(OUTPUT4); } elsif ($file_desig[$i] eq 5) { select(OUTPUT5); } elsif ($file_desig[$i] eq 6) { select(OUTPUT6); } elsif ($file_desig[$i] eq 7) { select(OUTPUT7); } elsif ($file_desig[$i] eq 8) { select(OUTPUT8); } elsif ($file_desig[$i] eq 9) { select(OUTPUT9); } elsif ($file_desig[$i] eq 10) { select(OUTPUT10); } elsif ($file_desig[$i] eq 11) { select(OUTPUT11); } elsif ($file_desig[$i] eq 12) { select(OUTPUT12); } elsif ($file_desig[$i] eq 13) { select(OUTPUT13); } elsif ($file_desig[$i] eq 14) { select(OUTPUT14); } print "-D Grid : \"Grid_" , $Grid_data[$i] , "\"\n\n" if ($VAR_D); # Print out instance of Grid. print "Grid : \"Grid_" , $Grid_data[$i] , "\"\n"; print "Title \"Grid_" , $Grid_data[$i] , "\"\n"; print "Space_at 2 3\n"; print "View \"Grid_type_1\"\n"; # Initialise row number. $gridrow = 0; for ($j = 0; $j < $clone_count; $j++){ # Print out rows of all clones, for all STSs, # pertaining to the present file. if ($file_desig[$i] eq $file_desig[$j]){ $gridrow++; print "Row $gridrow Clone"; $k = 0; while ($clone[$j][$k] ne ""){ print " $clone[$j][$k]"; $k++; } print "\n"; } } print "\n\n\n"; } } # Changes input like 29/2/97 to 97-2-29 . sub AceDate { @VAR_date = (); $_ =~s/\s//g; $_ =~s/\n//g; @temp_date = split ("/", $_[0]); $VAR_date[0] = $temp_date[2]; $VAR_date[2] = $temp_date[0]; $VAR_date[1] = $temp_date[1]; $VAR_date = join("-", @VAR_date); return $VAR_date; }; # Auxillary string to integer function. sub strtoint{ local($num, $i, @digits); @digits = split("" , $_[0]); $num = 0; for ($i = 0; $i <= $#digits; $i++){ $num += (10**($#digits - $i)) * (ord($digits[$i]) - 48); } return $num; } # Auxillary English numeral to Roman numeral for integers < 100. sub stringtoRoman { local($Roman, $Roman2, $i, @digits); @digits = (); $Roman2 = ""; $Roman= ""; @digits = split("" , $_[0]); @digits = reverse(@digits); if ($#digits >= 0){ $dig = ord($digits[0]) - 48; if ($dig == 9){ $Roman = "IX"; } elsif ($dig == 4){ $Roman = "IV"; } else { if ($dig > 5){ $Roman = "V"; } $dig = $dig % 5; for ($i = 0; $i < $dig; $i++){ $Roman .= "I"; } } } if ($#digits >= 1){ $dig = ord($digits[1]) - 48; if ($dig == 9){ $Roman2 = "XC"; } elsif ($dig == 4){ $Roman2 = "XL"; } else { if ($dig > 5){ $Roman2 = "L"; } $dig = $dig % 5; for ($i = 0; $i < $dig; $i++){ $Roman2 .= "X"; } } } $Roman2 .= $Roman; return $Roman2; } exit;