VCF¶

VCF to PED-MAP¶

# Include extension of file. For example, input_file.vcf.
./plink --vcf input_file.vcf --recode --out output_file

VCF to RAW¶

# Include extension of file. For example, input_file.vcf.
./plink --vcf input_file.vcf --recodeA --out output_file

VCF to BED-BIM-FAM¶

# Include extension of file. For example, input_file.vcf.
./plink --vcf input_file.vcf --make-bed --out output_file

VCF to GEN-SAMPLE¶

# Include extension of file. For example, input_file.vcf.
./plink --vcf input_file.vcf  --export oxford --out output_file

VCF to 23andme¶

# Input file should not include extension
# Make a directory in which 23andme files will be saved.
if not os.path.isdir("23andme"):
   os.mkdir("23andme")

#Convert VCF to BED_BIM_FAM --> VCFtoBED_BIM_FAM()
./plink --vcf input_file+".vcf" --make-bed --out output_file

# It will generate three files output_file.bed, output_file.fam, and output_file.bim

#Extract id of each person
os.system("bcftools query -l "+input_file+" > ./23andme/temp_samples.txt")

#Open that file
f = open("./23andme/temp_samples.txt", "r")
for x in f:
   #Write each person name in a specific file
   temp = open("./23andme/temp.txt", "w")

   temp.write(x.strip('\n').split("_")[0] +"  "+x.strip('\n').split("_")[1])
   temp.close()

   #Extract each person from BED,BIM,FAM file and convert it to 23andme.
   os.system("./plink --bfile "+input_file.split(".")[0]+" --keep ./23andme/temp.txt --recode 23 --snps-only --out ./23andme/"+x.strip('\n'))

VCF to AncestryDNA¶

#Input file should not include extension
Convert VCF to 23andme --> VCFto23andme()

#Convert 23andme to AncestryDNA -->23andmeytoAncestryDNA()
# Make a directory in which AncestryDNA files will be saved.
if not os.path.isdir("AncestryDNA"):
   os.mkdir("AncestryDNA")
  #VCFto_23andme(input_file)

#Read 23andme files
_23andmefiles  = os.listdir('./23andme')

#Read files one-by-one
for files in _23andmefiles:
   # 23andme files are in .txt file format
   if ".txt" in files and "temp" not in files:

      #Check size
      if os.stat("./23andme"+os.sep+files).st_size == 0:
         continue
      else:
         data = pd.read_csv("./23andme"+os.sep+files,sep="\t",skiprows=8)
         new = pd.DataFrame()


         new['Rsid'] = data['# rsid'].values
         new['Chromosome'] = data['chromosome'].values
         new['position'] = data['position'].values

         #Split genotype into allele1 and allele2
         new['allele1'] = data['genotype'].str[0]
         new['allele2'] =data['genotype'].str[1]

         #Change chromosome numbers
         new['Chromosome'] = new['Chromosome'].replace(23, 'X')
         new['Chromosome'] = new['Chromosome'].replace(24, 'Y')
         new['Chromosome'] = new['Chromosome'].replace(25, 'XY')
         new['Chromosome'] = new['Chromosome'].replace(26, 'MT')
         new.to_csv("./AncestryDNA"+os.sep+files, sep="\t")

VCF to HAPS-LEGEND-SAMPLE¶

bcftools convert input_file.vcf  -h  output_file