# pastebin

Paste Search Dynamic
Recent pastes
Count Motifs
1. # Insert your Count(Motifs) function here from the last Code Challenge.
2.
3. # Input:  A set of kmers Motifs
4. # Output: Count(Motifs)
5. def Count(Motifs):
6.     count = {} # initializing the count dictionary
8.     k = len(Motifs[0])
9.     for symbol in "ACGT":
10.         count[symbol] = []
11.         for j in range(k):
12.             count[symbol].append(0)
13.
14.     t = len(Motifs)
15.     for i in range(t):
16.         for j in range(k):
17.             symbol = Motifs[i][j]
18.             count[symbol][j] += 1
19.
20.     return count
21.
22.
23. # Input:  A list of kmers Motifs
24. # Output: the profile matrix of Motifs, as a dictionary of lists.
25. def Profile(Motifs):
26.     t = len(Motifs)
27.     k = len(Motifs[0])
28.     profile = {}
29.     # insert your code here
30.     profile = Count(Motifs)
31.
32.     # dic.items() returns pair list of key and value
33.     #list = profile.items()
34.
35.     # to get value list, dic.values() needs to be used
36.     stringList = profile.values()
37.
38.     for string in stringList:
39.         for i in range(len(string)):
40.             string[i] = string[i] / t
41.
42.     return profile
43.
44.
45. # Input:  A set of kmers Motifs
46. # Output: A consensus string of Motifs.
47. def Consensus(Motifs):
48.     # insert your code here
49.     k = len(Motifs[0])
50.     count = Count(Motifs)
51.
52.     consensus = ""
53.     for j in range(k):
54.         m = 0
55.         frequentSymbol = ""
56.         for symbol in "ACGT":
57.             if count[symbol][j] > m:
58.                 m = count[symbol][j]
59.                 frequentSymbol = symbol
60.         consensus += frequentSymbol
61.
62.     return consensus
63.
64.
65.
66. # Input:  A set of k-mers Motifs
67. # Output: The score of these k-mers.
68. def Score(Motifs):
69.     # Insert code here
70.     t = len(Motifs)
71.     k = len(Motifs[0])
72.
73.     # get consensus string
74.     consensus = Consensus(Motifs)
75.
76.     score = 0
77.     for j in range(k):
78.         for i in range(t):
79.             symbol = Motifs[i][j]
80.             if consensus[j] != symbol:
81.                 score += 1
82.
83.     return score
84.
85.
86.
87. # Input:  String Text and profile matrix Profile
88. # Output: Pr(Text, Profile)
89. def Pr(Text, Profile):
90.     # insert your code here
91.     k = len(Text)
92.     pr = 1.0
93.     for j in range(k):
94.         symbol = Text[j]
95.         prList = Profile[symbol]
96.         pr *= prList[j]
97.
98.     return pr
99.
100.
101. # Input:  String Text, an integer k, and profile matrix Profile
102. # Output: ProfileMostProbablePattern(Text, k, Profile)
103. def ProfileMostProbablePattern(Text, k, Profile):
104.     # insert your code here. Make sure to use Pr(Text, Profile) as a subroutine!
105.     l = len(Text)
106.
107.     pr_most = 0.0
108.     mostProbablePattern = Text[0:k]
109.
110.     for j in range(l-k+1):
111.         k_mer = Text[j:j+k]
112.         #print (k_mer)
113.         pr = Pr(k_mer, Profile)
114.         if pr > pr_most:
115.             pr_most = pr
116.             mostProbablePattern = k_mer
117.
118.     return mostProbablePattern
119.
120.
121.
122. # Input:  A list of kmers Dna, and integers k and t (where t is the number of kmers in Dna)
123. # Output: GreedyMotifSearch(Dna, k, t)
124. def GreedyMotifSearch(Dna, k, t):
125.     # type your GreedyMotifSearch code here.
126.     BestMotifs = []
127.     for i in range(0, t):
128.         BestMotifs.append(Dna[i][0:k])
129.
130.     # length of the first Dna string
131.     n = len(Dna[0])
132.     for i in range(n-k+1):
133.         Motifs = []
134.         Motifs.append(Dna[0][i:i+k])
135.         for j in range(1, t):
136.             P = Profile(Motifs[0:j])
137.             Motifs.append(ProfileMostProbablePattern(Dna[j], k, P))
138.
139.         if Score(Motifs) < Score(BestMotifs):
140.             BestMotifs = Motifs
141.
142.     return BestMotifs
143.
144. """
145. ### DO NOT MODIFY THE CODE BELOW THIS LINE ###
146. import sys
148. k,t = lines[0].split()
149. k = int(k)
150. t = int(t)
151. print('n'.join(GreedyMotifSearch(lines[1:],k,t)))
152. """
153.
154.
155. import sys
157.
158. k = 12
159. t = 25
160.
161. DNA1='GATTGCTTAGGGCCCCACGAGGCTACGTCTGACTAACGTGACTCCCACCGTTAGTCGACGGATGCGTTGCTTGGGCCTAACCTGCGTATACCCAAGTCCTACCCGGGCCGTAAAGCTCCGAAGCTGTTTGATATTTGTCGAGGGACTAAAGAGGAG'
162. DNA2='GGCCTCTTTTTCACTGGTGGAGTGTTTGCCTGGCGGGGACAGCGATAATAACATAACCTGGCACCAGTGCCCCCACGTTAGCACCTAGCTCGTGTGTGAGGGGAGGTCCCTCAGGATCTACTAAGCTTATCGTGGATACACTGGACCCCTCCGACT'
163. DNA3='AAAAACTGCTTTCCTCATAACCTGAGGGAAACCGGACGCATGACGCGCCCGGCCCACTAGGTGTTTACGTACCGCACTAGGGCATTCTAGCTGTTGCAAGAACTCTGGTGACCGCGGTTCTAAATTGTCCACATAGATCCGACCGACAGGTCTTCT'
164. DNA4='TTACCTAAGTTCTGCCTAGGACATCTACCAGTGTTGGGACGTAACCTGACAGTCCGCAATTACGAGCAGGTTAGGATCGACGCATTGCGATTGGATAGACTGTCGGCGTAGACGAGCTCCTTTATTATGCCACCCCCTTGCTTTGGTAGACGGCTA'
165. DNA5='CATTTAGGGAAACTACTTAACCTGGTGAAGCTCCTAGCCACCGGGTAGTTGCATGCGGGCAGTATCGGAACATGCCGGGTCGTAGGGACAACTCCGCCCTGGATATCCTGGAATCATTCTGCGCGGGCAAAATAGTTTCATCCACACCGCTATCAT'
166. DNA6='CTCTCCTGGCGTTAGCATAACCTGTCTTGATCGTTCGTAGTTTAGGCAGACCGTGGATAAATGGGGTCTTAAACCAGCGAGCGTATACTGCCGAAAAATCGGACGGTCAAATTTTCATGGACGGTGATTTAGACAAACCGAATACTGCTCCAGTAC'
167. DNA7='CATGCGTGACGTAGATAGGAGAAACATTACGTCCATATGAGGGAGACTGTACGCCAGGCCCGCCTTAACCTGTGGTAGGTTTCTGATTATACGCGAACCAGTTTTCTGAAATGGTAAAAAGTAAATAGCGTCGTGGTTGTCTGTCAATCGCCAGTC'
168. DNA8='TTCAGTGACCCTACTGGAGAAGCTGAGCTTATGATAAGCACACGCCCGTCGGATACGATTAAGACCGACGTCTTCCATAACCTGTGCGTCCGTTCCTCACCGTCTACGAGAGGGCTCACGTTATTTTCTGCGCCCCTTCGAAATTTTGTGGAAGGA'
169. DNA9='CGGTAAGATCTATGACTCTAGTACAGCGGGAGTTTCGTGTGTGACAGATTTCAGGTCACAAGACATACATAAAAGTCTATCTCTCGTCTGCCTCCCACTAGGATCATCACACCTAACCTGAATTAATGGTTAGCTCCAGGATCAAACGGGCCAGGG'
170. DNA10='GTTCTATAGTCTAGTAACGAAGGACATGATAAGAACGCTCGACTCAAGACTTCGTTGAGCGGTTAAATCAGAACACAGACGTCCTGATCTAAAACTAGCGGAAAGCATCCCCTTAACCTGCACGGGGGAGACCCTACGAATCTACATGGCAAAAAT'
171. DNA11='GAGATCCATCCAATCTCCGATTCGTTACTTAACCTGTGCGTTGAACCGTACGTAATCACTACTCTAACCTATCACCTGTACGATCGACTTTCCCTCCATACCCCGGCGTACCGGTCATATCGATCCAAGGTCTGCAACCTTTAGGTCGTGAGCATC'
172. DNA12='TTCTGTACCCAAGACAGGATTTTTCTTTCCCCAACAAGAGCCTCGGAGGGGACCCGCCATGATGGACCGCGTAGGAGCGATCCGTATCACCATCAGGGCTCCCACAGGCGACATAACCTGATATTTACCCCTAGGATAGGATCCCACAAATTGCGC'
173. DNA13='CGGGTCGACCTGAGTACGAGATTACTACCGAATGGATGTGCGTCTCATATGCCATTACGCTTGGGGTGACTTCTCCTCGGCCACTACACAGCCTCGTGTCCTAACCTGGGAGCCCTTCCTTCTAATGGCGTCGACGCACCGTAATGCGTTTGTCTT'
174. DNA14='GACGCTTATGTTAGTTATGTATAGGGTTTGCATCGTTCTAGAGGAGGCAGGCAAGATCGGCGCGCGCGATTTCGGAAAGCTCTTATACAAACCCCAATCTAAGACCCCACACGAAGAGTTAGCCGTAACCTGTTCGACCAAGTAGCGAAGTACAAT'
175. DNA15='TAGAAAAGGCGTCATGACTATTCGTCACCGGAAATAGAGTGGGGTTTCACTTCCAAGGCTCAGCAGAATGCGAACTGTGTAACTAGGGGCATTAGGTCACATAACCTGCATCGCTGGGCGTGACCGCAGTACAACAATTTCCCCCTCACCTGGTTG'
176. DNA16='CCAAAATGAGTATCACTTAACCTGTGGCGGACGAGTATGAGCAGTTCGACCACGGGTAAAACATGATCTCTGTCATACTCCTCAATGGTCCGCTAGGTCCAGAAGATCATCACGGCGTAGCGGATCGGGCTATCTTACTATATTCAGCACCGACGA'
177. DNA17='GGCCCTAACCTGCATTGGCCAAGGGAAGCCTACGGGTAACCAACGAAAATCGACTCTTCCACCGGCGGTCTTTGGGCGAGTGACCGGTAAATCGACTTTTAGAGGCGCACCCTAGGGCCCGTGGTCGGTATCTAATGTATTCTCGAGCGTTTTACC'
178. DNA18='GAGGTGCATCCTTTTGGACCACTTATGCTTAACCTGGATCCCAGGCATCAAATGGCCAGTCACCGATTCAGGGCCGAACCAAAGCACCCCCTTAACTGGGTCGATTTTCCCGGTGGACTACTAACATTCACATCACCTAATGTGGAGCGTCACCGC'
179. DNA19='CATTCCGGGGTGGTGTATTAATCACTCCTCCGGGTAGTTCCCCAAGCTTACGCTTCGCGTTAACACGATTCACGGAGACGGACCATTGGTTTGAAAGTCGCTATCACCGTTTGACTGTGGACGAGAACAAGACTTATCCATCGGACACTTAACCTG'
180. DNA20='GGAAGATCTAATGTGCTACCGACTTGAATATTTCGCGATACTCAACCTGAAGCAAGAAATTAACTTAACCTGGAGGTTGACGTAAGGCTATGCAAATTCCATGAATCACACAGAGCCCATTGCCGTGGATCCGACGATTACTTATGGTCGCGGGGA'
181. DNA21='TCACGTAACCTGGACCAATGCCGACCTAAGTTCGGTGACCAAGGGCTTGACCCCGCTCTGAGGTTGGCTTCTAGCCACTCAACGGGGTTTGTGCCTTAAAACTACGAAATCCTTCACCTGGGGTTCTCGAGCTTGTGATCGGTGTACGCAAAATGA'
182. DNA22='ACGGTGAACACACCATAATTATGGTAAACCGGTAAAGCCCCTAACCTGGTAACATGCAACATTCAACACCTGCTGAGTCGCGAGCACAGGTTGCCGGCTCTTGTCCGATCGCGTATGGGTGAGAAGAACTCCCTGTGAGTAAATGAATAGATGGAC'
183. DNA23='GTCAATCGCCATAACCGGCGGGTCTGTCTTCGAGCGGCTTGAATTACGCTAGACGGTACTACCCCCTATCGCCCAAAACGTCGTGCGCACTCAGGGTATAACAGCTTCAATCGATGGGCCTGCAACCGTTTCCCCCGTAACCTGCTGTACCGTCCT'
184. DNA24='AAGCCTAACCTGATTTGGCTACGCAGGCACGCCGTTCAGGCACCGGGTATTCTGCAACATTCCGGTCCAACGATAGAGGTGTTCCGAGATGTGTGGTGTCTTACTAGTATTAGTGGACCTGGGATGAACGAATTCCATTGCAACCGATAACCGGAG'
185. DNA25='TTACACCTCCATTCCGTAACCGAGTGGCACCCACCCTGGATTTATTTCTTCTCGAGTGAGCCTACGTTAGCCCCAACCACTTTTTTAAACTATCCATTACTTAACCTGACCTCAAGCCTTTCTCACCCTACAGCCGGACCAGGCGCGCTTAAGAAT'
186.
187. Dna = [ DNA1, DNA2, DNA3, DNA4, DNA5, DNA6, DNA7, DNA8, DNA9, DNA10, DNA11, DNA12, DNA13, DNA14, DNA15, DNA16, DNA17, DNA18, DNA19, DNA20, DNA21, DNA22, DNA23, DNA24, DNA25 ]
188.
189. print('n'.join(GreedyMotifSearch(Dna,k,t)))
Parsed in 0.138 seconds