Using tabix with dbVar GVF and VCF FTP Files
GVF and VCF formatted data in dbVar can be found in the dbVar FTP Manifest. You can download and query the files locally or remotely using the tabix -h option. For local queries, download both the *.vcf.gz file and the corresponding *.vcf.gz.tbi file.
Note: dbVar does not provide support for tabix installation. Please follow the link above to the tabix software provider and follow the installation instructions for your specific system.
Command line tabix remote query:
tabix -h https://ftp.ncbi.nlm.nih.gov/pub/dbVar/data/Homo_sapiens/by_study/vcf/nstd102.GRCh38.variant_region.vcf.gz 1:1-1000000 |
Using Python (https://pypi.python.org/pypi/pytabix)
import tabix # Open a remote dbVar vcf file vcfurl = "https://ftp.ncbi.nlm.nih.gov/pub/dbVar/data/Homo_sapiens/by_study/vcf/nstd102.GRCh38.variant_region.vcf.gz" tb = tabix.open(vcfurl) # query chr1 from 1 to 1000000 records = tb.query("1", 1, 1000000) #1:1-1000000 # iterate over each record print the columns. for r in records: print ("\t".join(r)) |
Output
1 | 14874 | nsv3909879 | G | <CNV> | . | . | DBVARID;SVTYPE=CNV;IMPRECISE;END=297968;CIPOS=.,0;CIEND=0,.;DESC=NCBI36%2Fhg18%201p36.33%28chr1%3A4737-338603%29x3%20AND%20See%20cases |
1 | 14874 | nsv3913374 | G | <CNV> | . | . | DBVARID;SVTYPE=CNV;IMPRECISE;END=8785327;CIPOS=.,0;CIEND=0,0;DESC=NCBI36%2Fhg18%201p36.33-36.23%28chr1%3A4737-8734404%29x1%20AND%20See%20cases |
1 | 14874 | nsv3924689 | G | <CNV> | . | . | DBVARID;SVTYPE=CNV;IMPRECISE;END=7467242;CIPOS=.,0;CIEND=0,0;DESC=NCBI36%2Fhg18%201p36.33-36.23%28chr1%3A4737-7424898%29x1%20AND%20See%20cases |
1 | 16302 | nsv3902961 | G | <CNV> | . | . | DBVARID;SVTYPE=CNV;IMPRECISE;END=166909;CIPOS=.,0;CIEND=0,.;DESC=GRCh37%2Fhg19%201q21.1%28chr1%3A143134063-143284670%29x3%20AND%20See%20cases |
1 | 19225 | nsv3881642 | C | <CNV> | . | . | DBVARID;SVTYPE=CNV;IMPRECISE;END=4341631;CIPOS=.,0;CIEND=0,.;DESC=GRCh37%2Fhg19%201p36.33-36.32%28chr1%3A19225-4401691%29x3%20AND%20See%20cases |
1 | 47851 | nsv3877322 | C | <CNV> | . | . | DBVARID;SVTYPE=CNV;IMPRECISE;END=6599812;CIPOS=.,0;CIEND=0,.;DESC=GRCh37%2Fhg19%201p36.33-36.31%28chr1%3A47851-6659872%29x1%20AND%20not%20provided |
1 | 47851 | nsv3877365 | C | <CNV> | . | . | DBVARID;SVTYPE=CNV;IMPRECISE;END=248934250;CIPOS=.,0;CIEND=0,.;DESC=GRCh37%2Fhg19%201p36.33-q44%28chr1%3A47851-249228449%29x3%20AND%20not%20provided |
1 | 47851 | nsv3878985 | C | <CNV> | . | . | DBVARID;SVTYPE=CNV;IMPRECISE;END=1229930;CIPOS=.,0;CIEND=0,.;DESC=GRCh37%2Fhg19%201p36.33%28chr1%3A47851-1165310%29x1%20AND%20not%20provided |
1 | 47851 | nsv3887654 | C | <CNV> | . | . | DBVARID;SVTYPE=CNV;IMPRECISE;END=778658;CIPOS=.,0;CIEND=0,.;DESC=GRCh37%2Fhg19%201p36.33%28chr1%3A47851-714038%29x3%20AND%20not%20provided |
1 | 47851 | nsv3889387 | C | <CNV> | . | . | DBVARID;SVTYPE=CNV;IMPRECISE;END=91538;CIPOS=.,0;CIEND=0,.;DESC=GRCh37%2Fhg19%201p36.33%28chr1%3A47851-91538%29x1%20AND%20not%20provided |
1 | 80386 | nsv3878412 | C | <CNV> | . | . | DBVARID;SVTYPE=CNV;IMPRECISE;END=91719;CIPOS=.,0;CIEND=0,.;DESC=GRCh37%2Fhg19%201p36.33%28chr1%3A80386-91719%29x1%20AND%20not%20provided |
Using Perl (https://vcftools.github.io/perl_module.html)
#!/opt/perl/5.16.3/bin/perl
use Vcf;
# Specify a remote dbVar vcf file
my $vcf_base_url = "https://ftp.ncbi.nlm.nih.gov/pub/dbVar/data/Homo_sapiens/by_study/vcf/";
my $vcf_file .= "nstd102.GRCh38.variant_region.vcf.gz";
my $vcfurl .= "$vcf_base_url . $vcf_file";
# query chr1 from 1 to 1000000
my $vcf = Vcf->new(file=>$vcfurl,region=>'1:1-1000000');
# iterate over each record ignoring header comment lines and print
while (my $x=$vcf->next_line())
{
next if ($x=~m/^#/);
print $x;
}
|
Output
1 | 14874 | nsv3909879 | G | <CNV> | . | . | DBVARID;SVTYPE=CNV;IMPRECISE;END=297968;CIPOS=.,0;CIEND=0,.;DESC=NCBI36%2Fhg18%201p36.33%28chr1%3A4737-338603%29x3%20AND%20See%20cases |
1 | 14874 | nsv3913374 | G | <CNV> | . | . | DBVARID;SVTYPE=CNV;IMPRECISE;END=8785327;CIPOS=.,0;CIEND=0,0;DESC=NCBI36%2Fhg18%201p36.33-36.23%28chr1%3A4737-8734404%29x1%20AND%20See%20cases |
1 | 14874 | nsv3924689 | G | <CNV> | . | . | DBVARID;SVTYPE=CNV;IMPRECISE;END=7467242;CIPOS=.,0;CIEND=0,0;DESC=NCBI36%2Fhg18%201p36.33-36.23%28chr1%3A4737-7424898%29x1%20AND%20See%20cases |
1 | 16302 | nsv3902961 | G | <CNV> | . | . | DBVARID;SVTYPE=CNV;IMPRECISE;END=166909;CIPOS=.,0;CIEND=0,.;DESC=GRCh37%2Fhg19%201q21.1%28chr1%3A143134063-143284670%29x3%20AND%20See%20cases |
1 | 19225 | nsv3881642 | C | <CNV> | . | . | DBVARID;SVTYPE=CNV;IMPRECISE;END=4341631;CIPOS=.,0;CIEND=0,.;DESC=GRCh37%2Fhg19%201p36.33-36.32%28chr1%3A19225-4401691%29x3%20AND%20See%20cases |
1 | 47851 | nsv3877322 | C | <CNV> | . | . | DBVARID;SVTYPE=CNV;IMPRECISE;END=6599812;CIPOS=.,0;CIEND=0,.;DESC=GRCh37%2Fhg19%201p36.33-36.31%28chr1%3A47851-6659872%29x1%20AND%20not%20provided |
1 | 47851 | nsv3877365 | C | <CNV> | . | . | DBVARID;SVTYPE=CNV;IMPRECISE;END=248934250;CIPOS=.,0;CIEND=0,.;DESC=GRCh37%2Fhg19%201p36.33-q44%28chr1%3A47851-249228449%29x3%20AND%20not%20provided |
1 | 47851 | nsv3878985 | C | <CNV> | . | . | DBVARID;SVTYPE=CNV;IMPRECISE;END=1229930;CIPOS=.,0;CIEND=0,.;DESC=GRCh37%2Fhg19%201p36.33%28chr1%3A47851-1165310%29x1%20AND%20not%20provided |
1 | 47851 | nsv3887654 | C | <CNV> | . | . | DBVARID;SVTYPE=CNV;IMPRECISE;END=778658;CIPOS=.,0;CIEND=0,.;DESC=GRCh37%2Fhg19%201p36.33%28chr1%3A47851-714038%29x3%20AND%20not%20provided |
1 | 47851 | nsv3889387 | C | <CNV> | . | . | DBVARID;SVTYPE=CNV;IMPRECISE;END=91538;CIPOS=.,0;CIEND=0,.;DESC=GRCh37%2Fhg19%201p36.33%28chr1%3A47851-91538%29x1%20AND%20not%20provided |
1 | 80386 | nsv3878412 | C | <CNV> | . | . | DBVARID;SVTYPE=CNV;IMPRECISE;END=91719;CIPOS=.,0;CIEND=0,.;DESC=GRCh37%2Fhg19%201p36.33%28chr1%3A80386-91719%29x1%20AND%20not%20provided |