VCF¶
VCF to PED-MAP¶
# Include extension of file. For example, input_file.vcf.
./plink --vcf input_file.vcf --recode --out output_file
VCF to RAW¶
# Include extension of file. For example, input_file.vcf.
./plink --vcf input_file.vcf --recodeA --out output_file
VCF to BED-BIM-FAM¶
# Include extension of file. For example, input_file.vcf.
./plink --vcf input_file.vcf --make-bed --out output_file
VCF to GEN-SAMPLE¶
# Include extension of file. For example, input_file.vcf.
./plink --vcf input_file.vcf --export oxford --out output_file
VCF to 23andme¶
# Input file should not include extension
# Make a directory in which 23andme files will be saved.
if not os.path.isdir("23andme"):
os.mkdir("23andme")
#Convert VCF to BED_BIM_FAM --> VCFtoBED_BIM_FAM()
./plink --vcf input_file+".vcf" --make-bed --out output_file
# It will generate three files output_file.bed, output_file.fam, and output_file.bim
#Extract id of each person
os.system("bcftools query -l "+input_file+" > ./23andme/temp_samples.txt")
#Open that file
f = open("./23andme/temp_samples.txt", "r")
for x in f:
#Write each person name in a specific file
temp = open("./23andme/temp.txt", "w")
temp.write(x.strip('\n').split("_")[0] +" "+x.strip('\n').split("_")[1])
temp.close()
#Extract each person from BED,BIM,FAM file and convert it to 23andme.
os.system("./plink --bfile "+input_file.split(".")[0]+" --keep ./23andme/temp.txt --recode 23 --snps-only --out ./23andme/"+x.strip('\n'))
VCF to AncestryDNA¶
#Input file should not include extension
Convert VCF to 23andme --> VCFto23andme()
#Convert 23andme to AncestryDNA -->23andmeytoAncestryDNA()
# Make a directory in which AncestryDNA files will be saved.
if not os.path.isdir("AncestryDNA"):
os.mkdir("AncestryDNA")
#VCFto_23andme(input_file)
#Read 23andme files
_23andmefiles = os.listdir('./23andme')
#Read files one-by-one
for files in _23andmefiles:
# 23andme files are in .txt file format
if ".txt" in files and "temp" not in files:
#Check size
if os.stat("./23andme"+os.sep+files).st_size == 0:
continue
else:
data = pd.read_csv("./23andme"+os.sep+files,sep="\t",skiprows=8)
new = pd.DataFrame()
new['Rsid'] = data['# rsid'].values
new['Chromosome'] = data['chromosome'].values
new['position'] = data['position'].values
#Split genotype into allele1 and allele2
new['allele1'] = data['genotype'].str[0]
new['allele2'] =data['genotype'].str[1]
#Change chromosome numbers
new['Chromosome'] = new['Chromosome'].replace(23, 'X')
new['Chromosome'] = new['Chromosome'].replace(24, 'Y')
new['Chromosome'] = new['Chromosome'].replace(25, 'XY')
new['Chromosome'] = new['Chromosome'].replace(26, 'MT')
new.to_csv("./AncestryDNA"+os.sep+files, sep="\t")
VCF to HAPS-LEGEND-SAMPLE¶
bcftools convert input_file.vcf -h output_file