root / Am1.0 / script / gene / make_glean.py @ ed2c5f6e
History | View | Annotate | Download (1.5 kB)
1 | 8c368a17 | Daofeng Li | gene={} |
---|---|---|---|
2 | 8c368a17 | Daofeng Li | with open('snapdragon.scafseq.fa.Glean.filter.final.gff') as fin: |
3 | 8c368a17 | Daofeng Li | for line in fin: |
4 | 8c368a17 | Daofeng Li | lst=line.rstrip().split('\t')
|
5 | 8c368a17 | Daofeng Li | n=lst[8].split('=')[1].split(';')[0] |
6 | 8c368a17 | Daofeng Li | if lst[2]=='mRNA': |
7 | 8c368a17 | Daofeng Li | gene[n]=[lst[0],int(lst[3])-1,lst[4],lst[6],[],[]] |
8 | 8c368a17 | Daofeng Li | else:
|
9 | 8c368a17 | Daofeng Li | gene[n][4].append(int(lst[3])-1) |
10 | 8c368a17 | Daofeng Li | gene[n][5].append(int(lst[4])) |
11 | 8c368a17 | Daofeng Li | |
12 | 8c368a17 | Daofeng Li | fout=open('Glean','w') |
13 | 8c368a17 | Daofeng Li | fout2=open('Gleanstruct','w') |
14 | 8c368a17 | Daofeng Li | id=1
|
15 | 8c368a17 | Daofeng Li | |
16 | 8c368a17 | Daofeng Li | for n in gene: |
17 | 8c368a17 | Daofeng Li | lst=gene[n] |
18 | 8c368a17 | Daofeng Li | fout.write('{0}\t{1}\t{2}\t{3}\t{4}\t{5}\n'.format(lst[0],lst[1],lst[2],n,id,lst[3])) |
19 | 8c368a17 | Daofeng Li | fout2.write('{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}\t{9}\t{10}\n'.format(id,lst[0],lst[3],lst[1],lst[2], |
20 | 8c368a17 | Daofeng Li | min(lst[4]), |
21 | 8c368a17 | Daofeng Li | max(lst[5]), |
22 | 8c368a17 | Daofeng Li | len(lst[4]), |
23 | 8c368a17 | Daofeng Li | ','.join([str(i) for i in lst[4]]), |
24 | 8c368a17 | Daofeng Li | ','.join([str(i) for i in lst[5]]), |
25 | 8c368a17 | Daofeng Li | n)) |
26 | 8c368a17 | Daofeng Li | id+=1 |
27 | 8c368a17 | Daofeng Li | fout.close(); |
28 | 8c368a17 | Daofeng Li | fout2.close(); |
29 | 8c368a17 | Daofeng Li | |
30 | 8c368a17 | Daofeng Li | import os |
31 | 8c368a17 | Daofeng Li | os.system('sort -k1,1 -k2,2n Glean > xx')
|
32 | 8c368a17 | Daofeng Li | os.system('mv xx Glean')
|
33 | 8c368a17 | Daofeng Li | os.system('bgzip Glean')
|
34 | 8c368a17 | Daofeng Li | os.system('tabix -p bed Glean.gz')
|
35 | 8c368a17 | Daofeng Li | |
36 | 8c368a17 | Daofeng Li | print ''' |
37 | 8c368a17 | Daofeng Li | drop table if exists Gleanstruct;
|
38 | 8c368a17 | Daofeng Li | create table Gleanstruct (
|
39 | 8c368a17 | Daofeng Li | id int unsigned not null primary key,
|
40 | 8c368a17 | Daofeng Li | chrom varchar(255) not null,
|
41 | 8c368a17 | Daofeng Li | strand char(1) not null,
|
42 | 8c368a17 | Daofeng Li | txStart int unsigned not null,
|
43 | 8c368a17 | Daofeng Li | txEnd int unsigned not null,
|
44 | 8c368a17 | Daofeng Li | cdsStart int unsigned not null,
|
45 | 8c368a17 | Daofeng Li | cdsEnd int unsigned not null,
|
46 | 8c368a17 | Daofeng Li | exonCount int unsigned not null,
|
47 | 8c368a17 | Daofeng Li | exonStarts text not null,
|
48 | 8c368a17 | Daofeng Li | exonEnds text not null,
|
49 | 8c368a17 | Daofeng Li | name varchar(255) not null
|
50 | 8c368a17 | Daofeng Li | );
|
51 | 8c368a17 | Daofeng Li | load data local infile 'Glean.struct' into table Gleanstruct;
|
52 | 8c368a17 | Daofeng Li | |
53 | 8c368a17 | Daofeng Li | drop table if exists Gleansymbol;
|
54 | 8c368a17 | Daofeng Li | create table Gleansymbol (
|
55 | 8c368a17 | Daofeng Li | name varchar(255) not null,
|
56 | 8c368a17 | Daofeng Li | symbol varchar(255) null,
|
57 | 8c368a17 | Daofeng Li | description text null,
|
58 | 8c368a17 | Daofeng Li | id int unsigned not null primary key,
|
59 | 8c368a17 | Daofeng Li | index(name)
|
60 | 8c368a17 | Daofeng Li | );
|
61 | 8c368a17 | Daofeng Li | |
62 | 8c368a17 | Daofeng Li | ''' |