# Insert your Count(Motifs) function here from the last Code Challenge.
# Input: A set of kmers Motifs
# Output: Count(Motifs)
def Count(Motifs):
count = {} # initializing the count dictionary
# your code here
k = len(Motifs[0])
for symbol in "ACGT":
count[symbol] = []
for j in range(k):
count[symbol].append(0)
t = len(Motifs)
for i in range(t):
for j in range(k):
symbol = Motifs[i][j]
count[symbol][j] += 1
return count
# Input: A list of kmers Motifs
# Output: the profile matrix of Motifs, as a dictionary of lists.
def Profile(Motifs):
t = len(Motifs)
k = len(Motifs[0])
profile = {}
# insert your code here
profile = Count(Motifs)
# dic.items() returns pair list of key and value
#list = profile.items()
# to get value list, dic.values() needs to be used
stringList = profile.values()
for string in stringList:
for i in range(len(string)):
string[i] = string[i] / t
return profile
# Input: A set of kmers Motifs
# Output: A consensus string of Motifs.
def Consensus(Motifs):
# insert your code here
k = len(Motifs[0])
count = Count(Motifs)
consensus = ""
for j in range(k):
m = 0
frequentSymbol = ""
for symbol in "ACGT":
if count[symbol][j] > m:
m = count[symbol][j]
frequentSymbol = symbol
consensus += frequentSymbol
return consensus
# Input: A set of k-mers Motifs
# Output: The score of these k-mers.
def Score(Motifs):
# Insert code here
t = len(Motifs)
k = len(Motifs[0])
# get consensus string
consensus = Consensus(Motifs)
score = 0
for j in range(k):
for i in range(t):
symbol = Motifs[i][j]
if consensus[j] != symbol:
score += 1
return score
# Input: String Text and profile matrix Profile
# Output: Pr(Text, Profile)
def Pr(Text, Profile):
# insert your code here
k = len(Text)
pr = 1.0
for j in range(k):
symbol = Text[j]
prList = Profile[symbol]
pr *= prList[j]
return pr
# Input: String Text, an integer k, and profile matrix Profile
# Output: ProfileMostProbablePattern(Text, k, Profile)
def ProfileMostProbablePattern(Text, k, Profile):
# insert your code here. Make sure to use Pr(Text, Profile) as a subroutine!
l = len(Text)
pr_most = 0.0
mostProbablePattern = Text[0:k]
for j in range(l-k+1):
k_mer = Text[j:j+k]
#print (k_mer)
pr = Pr(k_mer, Profile)
if pr > pr_most:
pr_most = pr
mostProbablePattern = k_mer
return mostProbablePattern
# Input: A list of kmers Dna, and integers k and t (where t is the number of kmers in Dna)
# Output: GreedyMotifSearch(Dna, k, t)
def GreedyMotifSearch(Dna, k, t):
# type your GreedyMotifSearch code here.
BestMotifs = []
for i in range(0, t):
BestMotifs.append(Dna[i][0:k])
# length of the first Dna string
n = len(Dna[0])
for i in range(n-k+1):
Motifs = []
Motifs.append(Dna[0][i:i+k])
for j in range(1, t):
P = Profile(Motifs[0:j])
Motifs.append(ProfileMostProbablePattern(Dna[j], k, P))
if Score(Motifs) < Score(BestMotifs):
BestMotifs = Motifs
return BestMotifs
"""
### DO NOT MODIFY THE CODE BELOW THIS LINE ###
import sys
lines = sys.stdin.read().splitlines()
k,t = lines[0].split()
k = int(k)
t = int(t)
print('n'.join(GreedyMotifSearch(lines[1:],k,t)))
"""
import sys
lines = sys.stdin.read().splitlines()
k = 12
t = 25
DNA1='GATTGCTTAGGGCCCCACGAGGCTACGTCTGACTAACGTGACTCCCACCGTTAGTCGACGGATGCGTTGCTTGGGCCTAACCTGCGTATACCCAAGTCCTACCCGGGCCGTAAAGCTCCGAAGCTGTTTGATATTTGTCGAGGGACTAAAGAGGAG'
DNA2='GGCCTCTTTTTCACTGGTGGAGTGTTTGCCTGGCGGGGACAGCGATAATAACATAACCTGGCACCAGTGCCCCCACGTTAGCACCTAGCTCGTGTGTGAGGGGAGGTCCCTCAGGATCTACTAAGCTTATCGTGGATACACTGGACCCCTCCGACT'
DNA3='AAAAACTGCTTTCCTCATAACCTGAGGGAAACCGGACGCATGACGCGCCCGGCCCACTAGGTGTTTACGTACCGCACTAGGGCATTCTAGCTGTTGCAAGAACTCTGGTGACCGCGGTTCTAAATTGTCCACATAGATCCGACCGACAGGTCTTCT'
DNA4='TTACCTAAGTTCTGCCTAGGACATCTACCAGTGTTGGGACGTAACCTGACAGTCCGCAATTACGAGCAGGTTAGGATCGACGCATTGCGATTGGATAGACTGTCGGCGTAGACGAGCTCCTTTATTATGCCACCCCCTTGCTTTGGTAGACGGCTA'
DNA5='CATTTAGGGAAACTACTTAACCTGGTGAAGCTCCTAGCCACCGGGTAGTTGCATGCGGGCAGTATCGGAACATGCCGGGTCGTAGGGACAACTCCGCCCTGGATATCCTGGAATCATTCTGCGCGGGCAAAATAGTTTCATCCACACCGCTATCAT'
DNA6='CTCTCCTGGCGTTAGCATAACCTGTCTTGATCGTTCGTAGTTTAGGCAGACCGTGGATAAATGGGGTCTTAAACCAGCGAGCGTATACTGCCGAAAAATCGGACGGTCAAATTTTCATGGACGGTGATTTAGACAAACCGAATACTGCTCCAGTAC'
DNA7='CATGCGTGACGTAGATAGGAGAAACATTACGTCCATATGAGGGAGACTGTACGCCAGGCCCGCCTTAACCTGTGGTAGGTTTCTGATTATACGCGAACCAGTTTTCTGAAATGGTAAAAAGTAAATAGCGTCGTGGTTGTCTGTCAATCGCCAGTC'
DNA8='TTCAGTGACCCTACTGGAGAAGCTGAGCTTATGATAAGCACACGCCCGTCGGATACGATTAAGACCGACGTCTTCCATAACCTGTGCGTCCGTTCCTCACCGTCTACGAGAGGGCTCACGTTATTTTCTGCGCCCCTTCGAAATTTTGTGGAAGGA'
DNA9='CGGTAAGATCTATGACTCTAGTACAGCGGGAGTTTCGTGTGTGACAGATTTCAGGTCACAAGACATACATAAAAGTCTATCTCTCGTCTGCCTCCCACTAGGATCATCACACCTAACCTGAATTAATGGTTAGCTCCAGGATCAAACGGGCCAGGG'
DNA10='GTTCTATAGTCTAGTAACGAAGGACATGATAAGAACGCTCGACTCAAGACTTCGTTGAGCGGTTAAATCAGAACACAGACGTCCTGATCTAAAACTAGCGGAAAGCATCCCCTTAACCTGCACGGGGGAGACCCTACGAATCTACATGGCAAAAAT'
DNA11='GAGATCCATCCAATCTCCGATTCGTTACTTAACCTGTGCGTTGAACCGTACGTAATCACTACTCTAACCTATCACCTGTACGATCGACTTTCCCTCCATACCCCGGCGTACCGGTCATATCGATCCAAGGTCTGCAACCTTTAGGTCGTGAGCATC'
DNA12='TTCTGTACCCAAGACAGGATTTTTCTTTCCCCAACAAGAGCCTCGGAGGGGACCCGCCATGATGGACCGCGTAGGAGCGATCCGTATCACCATCAGGGCTCCCACAGGCGACATAACCTGATATTTACCCCTAGGATAGGATCCCACAAATTGCGC'
DNA13='CGGGTCGACCTGAGTACGAGATTACTACCGAATGGATGTGCGTCTCATATGCCATTACGCTTGGGGTGACTTCTCCTCGGCCACTACACAGCCTCGTGTCCTAACCTGGGAGCCCTTCCTTCTAATGGCGTCGACGCACCGTAATGCGTTTGTCTT'
DNA14='GACGCTTATGTTAGTTATGTATAGGGTTTGCATCGTTCTAGAGGAGGCAGGCAAGATCGGCGCGCGCGATTTCGGAAAGCTCTTATACAAACCCCAATCTAAGACCCCACACGAAGAGTTAGCCGTAACCTGTTCGACCAAGTAGCGAAGTACAAT'
DNA15='TAGAAAAGGCGTCATGACTATTCGTCACCGGAAATAGAGTGGGGTTTCACTTCCAAGGCTCAGCAGAATGCGAACTGTGTAACTAGGGGCATTAGGTCACATAACCTGCATCGCTGGGCGTGACCGCAGTACAACAATTTCCCCCTCACCTGGTTG'
DNA16='CCAAAATGAGTATCACTTAACCTGTGGCGGACGAGTATGAGCAGTTCGACCACGGGTAAAACATGATCTCTGTCATACTCCTCAATGGTCCGCTAGGTCCAGAAGATCATCACGGCGTAGCGGATCGGGCTATCTTACTATATTCAGCACCGACGA'
DNA17='GGCCCTAACCTGCATTGGCCAAGGGAAGCCTACGGGTAACCAACGAAAATCGACTCTTCCACCGGCGGTCTTTGGGCGAGTGACCGGTAAATCGACTTTTAGAGGCGCACCCTAGGGCCCGTGGTCGGTATCTAATGTATTCTCGAGCGTTTTACC'
DNA18='GAGGTGCATCCTTTTGGACCACTTATGCTTAACCTGGATCCCAGGCATCAAATGGCCAGTCACCGATTCAGGGCCGAACCAAAGCACCCCCTTAACTGGGTCGATTTTCCCGGTGGACTACTAACATTCACATCACCTAATGTGGAGCGTCACCGC'
DNA19='CATTCCGGGGTGGTGTATTAATCACTCCTCCGGGTAGTTCCCCAAGCTTACGCTTCGCGTTAACACGATTCACGGAGACGGACCATTGGTTTGAAAGTCGCTATCACCGTTTGACTGTGGACGAGAACAAGACTTATCCATCGGACACTTAACCTG'
DNA20='GGAAGATCTAATGTGCTACCGACTTGAATATTTCGCGATACTCAACCTGAAGCAAGAAATTAACTTAACCTGGAGGTTGACGTAAGGCTATGCAAATTCCATGAATCACACAGAGCCCATTGCCGTGGATCCGACGATTACTTATGGTCGCGGGGA'
DNA21='TCACGTAACCTGGACCAATGCCGACCTAAGTTCGGTGACCAAGGGCTTGACCCCGCTCTGAGGTTGGCTTCTAGCCACTCAACGGGGTTTGTGCCTTAAAACTACGAAATCCTTCACCTGGGGTTCTCGAGCTTGTGATCGGTGTACGCAAAATGA'
DNA22='ACGGTGAACACACCATAATTATGGTAAACCGGTAAAGCCCCTAACCTGGTAACATGCAACATTCAACACCTGCTGAGTCGCGAGCACAGGTTGCCGGCTCTTGTCCGATCGCGTATGGGTGAGAAGAACTCCCTGTGAGTAAATGAATAGATGGAC'
DNA23='GTCAATCGCCATAACCGGCGGGTCTGTCTTCGAGCGGCTTGAATTACGCTAGACGGTACTACCCCCTATCGCCCAAAACGTCGTGCGCACTCAGGGTATAACAGCTTCAATCGATGGGCCTGCAACCGTTTCCCCCGTAACCTGCTGTACCGTCCT'
DNA24='AAGCCTAACCTGATTTGGCTACGCAGGCACGCCGTTCAGGCACCGGGTATTCTGCAACATTCCGGTCCAACGATAGAGGTGTTCCGAGATGTGTGGTGTCTTACTAGTATTAGTGGACCTGGGATGAACGAATTCCATTGCAACCGATAACCGGAG'
DNA25='TTACACCTCCATTCCGTAACCGAGTGGCACCCACCCTGGATTTATTTCTTCTCGAGTGAGCCTACGTTAGCCCCAACCACTTTTTTAAACTATCCATTACTTAACCTGACCTCAAGCCTTTCTCACCCTACAGCCGGACCAGGCGCGCTTAAGAAT'
Dna = [ DNA1, DNA2, DNA3, DNA4, DNA5, DNA6, DNA7, DNA8, DNA9, DNA10, DNA11, DNA12, DNA13, DNA14, DNA15, DNA16, DNA17, DNA18, DNA19, DNA20, DNA21, DNA22, DNA23, DNA24, DNA25 ]
print('n'.join(GreedyMotifSearch(Dna,k,t)))