pastebin

Paste Search Dynamic
Recent pastes
Count Motifs
  1. # Insert your Count(Motifs) function here from the last Code Challenge.
  2.  
  3. # Input:  A set of kmers Motifs
  4. # Output: Count(Motifs)
  5. def Count(Motifs):
  6.     count = {} # initializing the count dictionary
  7.     # your code here
  8.     k = len(Motifs[0])
  9.     for symbol in "ACGT":
  10.         count[symbol] = []
  11.         for j in range(k):
  12.             count[symbol].append(0)
  13.              
  14.     t = len(Motifs)
  15.     for i in range(t):
  16.         for j in range(k):
  17.             symbol = Motifs[i][j]
  18.             count[symbol][j] += 1
  19.                        
  20.     return count
  21.  
  22.  
  23. # Input:  A list of kmers Motifs
  24. # Output: the profile matrix of Motifs, as a dictionary of lists.
  25. def Profile(Motifs):
  26.     t = len(Motifs)
  27.     k = len(Motifs[0])
  28.     profile = {}
  29.     # insert your code here
  30.     profile = Count(Motifs)
  31.    
  32.     # dic.items() returns pair list of key and value
  33.     #list = profile.items()
  34.    
  35.     # to get value list, dic.values() needs to be used
  36.     stringList = profile.values()
  37.    
  38.     for string in stringList:
  39.         for i in range(len(string)):
  40.             string[i] = string[i] / t
  41.    
  42.     return profile
  43.  
  44.  
  45. # Input:  A set of kmers Motifs
  46. # Output: A consensus string of Motifs.
  47. def Consensus(Motifs):
  48.     # insert your code here
  49.     k = len(Motifs[0])
  50.     count = Count(Motifs)
  51.  
  52.     consensus = ""
  53.     for j in range(k):
  54.         m = 0
  55.         frequentSymbol = ""
  56.         for symbol in "ACGT":
  57.             if count[symbol][j] > m:
  58.                 m = count[symbol][j]
  59.                 frequentSymbol = symbol
  60.         consensus += frequentSymbol
  61.  
  62.     return consensus
  63.      
  64.      
  65.      
  66. # Input:  A set of k-mers Motifs
  67. # Output: The score of these k-mers.
  68. def Score(Motifs):
  69.     # Insert code here
  70.     t = len(Motifs)
  71.     k = len(Motifs[0])
  72.    
  73.     # get consensus string
  74.     consensus = Consensus(Motifs)
  75.    
  76.     score = 0
  77.     for j in range(k):
  78.         for i in range(t):
  79.             symbol = Motifs[i][j]
  80.             if consensus[j] != symbol:
  81.                 score += 1    
  82.    
  83.     return score
  84.    
  85.  
  86.  
  87. # Input:  String Text and profile matrix Profile
  88. # Output: Pr(Text, Profile)
  89. def Pr(Text, Profile):
  90.     # insert your code here
  91.     k = len(Text)
  92.     pr = 1.0
  93.     for j in range(k):
  94.         symbol = Text[j]
  95.         prList = Profile[symbol]
  96.         pr *= prList[j]
  97.    
  98.     return pr
  99.    
  100.    
  101. # Input:  String Text, an integer k, and profile matrix Profile
  102. # Output: ProfileMostProbablePattern(Text, k, Profile)
  103. def ProfileMostProbablePattern(Text, k, Profile):
  104.     # insert your code here. Make sure to use Pr(Text, Profile) as a subroutine!
  105.     l = len(Text)
  106.    
  107.     pr_most = 0.0
  108.     mostProbablePattern = Text[0:k]
  109.    
  110.     for j in range(l-k+1):
  111.         k_mer = Text[j:j+k]
  112.         #print (k_mer)
  113.         pr = Pr(k_mer, Profile)
  114.         if pr > pr_most:
  115.             pr_most = pr
  116.             mostProbablePattern = k_mer
  117.    
  118.     return mostProbablePattern    
  119.    
  120.    
  121.    
  122. # Input:  A list of kmers Dna, and integers k and t (where t is the number of kmers in Dna)
  123. # Output: GreedyMotifSearch(Dna, k, t)
  124. def GreedyMotifSearch(Dna, k, t):
  125.     # type your GreedyMotifSearch code here.
  126.     BestMotifs = []
  127.     for i in range(0, t):
  128.         BestMotifs.append(Dna[i][0:k])
  129.        
  130.     # length of the first Dna string
  131.     n = len(Dna[0])
  132.     for i in range(n-k+1):
  133.         Motifs = []
  134.         Motifs.append(Dna[0][i:i+k])
  135.         for j in range(1, t):
  136.             P = Profile(Motifs[0:j])
  137.             Motifs.append(ProfileMostProbablePattern(Dna[j], k, P))      
  138.  
  139.         if Score(Motifs) < Score(BestMotifs):
  140.             BestMotifs = Motifs
  141.            
  142.     return BestMotifs
  143.            
  144. """
  145. ### DO NOT MODIFY THE CODE BELOW THIS LINE ###
  146. import sys
  147. lines = sys.stdin.read().splitlines()
  148. k,t = lines[0].split()
  149. k = int(k)
  150. t = int(t)
  151. print('n'.join(GreedyMotifSearch(lines[1:],k,t)))
  152. """
  153.  
  154.  
  155. import sys
  156. lines = sys.stdin.read().splitlines()
  157.  
  158. k = 12
  159. t = 25
  160.  
  161. DNA1='GATTGCTTAGGGCCCCACGAGGCTACGTCTGACTAACGTGACTCCCACCGTTAGTCGACGGATGCGTTGCTTGGGCCTAACCTGCGTATACCCAAGTCCTACCCGGGCCGTAAAGCTCCGAAGCTGTTTGATATTTGTCGAGGGACTAAAGAGGAG'
  162. DNA2='GGCCTCTTTTTCACTGGTGGAGTGTTTGCCTGGCGGGGACAGCGATAATAACATAACCTGGCACCAGTGCCCCCACGTTAGCACCTAGCTCGTGTGTGAGGGGAGGTCCCTCAGGATCTACTAAGCTTATCGTGGATACACTGGACCCCTCCGACT'
  163. DNA3='AAAAACTGCTTTCCTCATAACCTGAGGGAAACCGGACGCATGACGCGCCCGGCCCACTAGGTGTTTACGTACCGCACTAGGGCATTCTAGCTGTTGCAAGAACTCTGGTGACCGCGGTTCTAAATTGTCCACATAGATCCGACCGACAGGTCTTCT'
  164. DNA4='TTACCTAAGTTCTGCCTAGGACATCTACCAGTGTTGGGACGTAACCTGACAGTCCGCAATTACGAGCAGGTTAGGATCGACGCATTGCGATTGGATAGACTGTCGGCGTAGACGAGCTCCTTTATTATGCCACCCCCTTGCTTTGGTAGACGGCTA'
  165. DNA5='CATTTAGGGAAACTACTTAACCTGGTGAAGCTCCTAGCCACCGGGTAGTTGCATGCGGGCAGTATCGGAACATGCCGGGTCGTAGGGACAACTCCGCCCTGGATATCCTGGAATCATTCTGCGCGGGCAAAATAGTTTCATCCACACCGCTATCAT'
  166. DNA6='CTCTCCTGGCGTTAGCATAACCTGTCTTGATCGTTCGTAGTTTAGGCAGACCGTGGATAAATGGGGTCTTAAACCAGCGAGCGTATACTGCCGAAAAATCGGACGGTCAAATTTTCATGGACGGTGATTTAGACAAACCGAATACTGCTCCAGTAC'
  167. DNA7='CATGCGTGACGTAGATAGGAGAAACATTACGTCCATATGAGGGAGACTGTACGCCAGGCCCGCCTTAACCTGTGGTAGGTTTCTGATTATACGCGAACCAGTTTTCTGAAATGGTAAAAAGTAAATAGCGTCGTGGTTGTCTGTCAATCGCCAGTC'
  168. DNA8='TTCAGTGACCCTACTGGAGAAGCTGAGCTTATGATAAGCACACGCCCGTCGGATACGATTAAGACCGACGTCTTCCATAACCTGTGCGTCCGTTCCTCACCGTCTACGAGAGGGCTCACGTTATTTTCTGCGCCCCTTCGAAATTTTGTGGAAGGA'
  169. DNA9='CGGTAAGATCTATGACTCTAGTACAGCGGGAGTTTCGTGTGTGACAGATTTCAGGTCACAAGACATACATAAAAGTCTATCTCTCGTCTGCCTCCCACTAGGATCATCACACCTAACCTGAATTAATGGTTAGCTCCAGGATCAAACGGGCCAGGG'
  170. DNA10='GTTCTATAGTCTAGTAACGAAGGACATGATAAGAACGCTCGACTCAAGACTTCGTTGAGCGGTTAAATCAGAACACAGACGTCCTGATCTAAAACTAGCGGAAAGCATCCCCTTAACCTGCACGGGGGAGACCCTACGAATCTACATGGCAAAAAT'
  171. DNA11='GAGATCCATCCAATCTCCGATTCGTTACTTAACCTGTGCGTTGAACCGTACGTAATCACTACTCTAACCTATCACCTGTACGATCGACTTTCCCTCCATACCCCGGCGTACCGGTCATATCGATCCAAGGTCTGCAACCTTTAGGTCGTGAGCATC'
  172. DNA12='TTCTGTACCCAAGACAGGATTTTTCTTTCCCCAACAAGAGCCTCGGAGGGGACCCGCCATGATGGACCGCGTAGGAGCGATCCGTATCACCATCAGGGCTCCCACAGGCGACATAACCTGATATTTACCCCTAGGATAGGATCCCACAAATTGCGC'
  173. DNA13='CGGGTCGACCTGAGTACGAGATTACTACCGAATGGATGTGCGTCTCATATGCCATTACGCTTGGGGTGACTTCTCCTCGGCCACTACACAGCCTCGTGTCCTAACCTGGGAGCCCTTCCTTCTAATGGCGTCGACGCACCGTAATGCGTTTGTCTT'
  174. DNA14='GACGCTTATGTTAGTTATGTATAGGGTTTGCATCGTTCTAGAGGAGGCAGGCAAGATCGGCGCGCGCGATTTCGGAAAGCTCTTATACAAACCCCAATCTAAGACCCCACACGAAGAGTTAGCCGTAACCTGTTCGACCAAGTAGCGAAGTACAAT'
  175. DNA15='TAGAAAAGGCGTCATGACTATTCGTCACCGGAAATAGAGTGGGGTTTCACTTCCAAGGCTCAGCAGAATGCGAACTGTGTAACTAGGGGCATTAGGTCACATAACCTGCATCGCTGGGCGTGACCGCAGTACAACAATTTCCCCCTCACCTGGTTG'
  176. DNA16='CCAAAATGAGTATCACTTAACCTGTGGCGGACGAGTATGAGCAGTTCGACCACGGGTAAAACATGATCTCTGTCATACTCCTCAATGGTCCGCTAGGTCCAGAAGATCATCACGGCGTAGCGGATCGGGCTATCTTACTATATTCAGCACCGACGA'
  177. DNA17='GGCCCTAACCTGCATTGGCCAAGGGAAGCCTACGGGTAACCAACGAAAATCGACTCTTCCACCGGCGGTCTTTGGGCGAGTGACCGGTAAATCGACTTTTAGAGGCGCACCCTAGGGCCCGTGGTCGGTATCTAATGTATTCTCGAGCGTTTTACC'
  178. DNA18='GAGGTGCATCCTTTTGGACCACTTATGCTTAACCTGGATCCCAGGCATCAAATGGCCAGTCACCGATTCAGGGCCGAACCAAAGCACCCCCTTAACTGGGTCGATTTTCCCGGTGGACTACTAACATTCACATCACCTAATGTGGAGCGTCACCGC'
  179. DNA19='CATTCCGGGGTGGTGTATTAATCACTCCTCCGGGTAGTTCCCCAAGCTTACGCTTCGCGTTAACACGATTCACGGAGACGGACCATTGGTTTGAAAGTCGCTATCACCGTTTGACTGTGGACGAGAACAAGACTTATCCATCGGACACTTAACCTG'
  180. DNA20='GGAAGATCTAATGTGCTACCGACTTGAATATTTCGCGATACTCAACCTGAAGCAAGAAATTAACTTAACCTGGAGGTTGACGTAAGGCTATGCAAATTCCATGAATCACACAGAGCCCATTGCCGTGGATCCGACGATTACTTATGGTCGCGGGGA'
  181. DNA21='TCACGTAACCTGGACCAATGCCGACCTAAGTTCGGTGACCAAGGGCTTGACCCCGCTCTGAGGTTGGCTTCTAGCCACTCAACGGGGTTTGTGCCTTAAAACTACGAAATCCTTCACCTGGGGTTCTCGAGCTTGTGATCGGTGTACGCAAAATGA'
  182. DNA22='ACGGTGAACACACCATAATTATGGTAAACCGGTAAAGCCCCTAACCTGGTAACATGCAACATTCAACACCTGCTGAGTCGCGAGCACAGGTTGCCGGCTCTTGTCCGATCGCGTATGGGTGAGAAGAACTCCCTGTGAGTAAATGAATAGATGGAC'
  183. DNA23='GTCAATCGCCATAACCGGCGGGTCTGTCTTCGAGCGGCTTGAATTACGCTAGACGGTACTACCCCCTATCGCCCAAAACGTCGTGCGCACTCAGGGTATAACAGCTTCAATCGATGGGCCTGCAACCGTTTCCCCCGTAACCTGCTGTACCGTCCT'
  184. DNA24='AAGCCTAACCTGATTTGGCTACGCAGGCACGCCGTTCAGGCACCGGGTATTCTGCAACATTCCGGTCCAACGATAGAGGTGTTCCGAGATGTGTGGTGTCTTACTAGTATTAGTGGACCTGGGATGAACGAATTCCATTGCAACCGATAACCGGAG'
  185. DNA25='TTACACCTCCATTCCGTAACCGAGTGGCACCCACCCTGGATTTATTTCTTCTCGAGTGAGCCTACGTTAGCCCCAACCACTTTTTTAAACTATCCATTACTTAACCTGACCTCAAGCCTTTCTCACCCTACAGCCGGACCAGGCGCGCTTAAGAAT'
  186.  
  187. Dna = [ DNA1, DNA2, DNA3, DNA4, DNA5, DNA6, DNA7, DNA8, DNA9, DNA10, DNA11, DNA12, DNA13, DNA14, DNA15, DNA16, DNA17, DNA18, DNA19, DNA20, DNA21, DNA22, DNA23, DNA24, DNA25 ]
  188.  
  189. print('n'.join(GreedyMotifSearch(Dna,k,t)))
Parsed in 0.138 seconds