RAW¶

RAW to PED-MAP¶

def converter(x):
   # Fill "NA" with '0 0'
   x = x.fillna('0 0')

   # Convert numbers to integer
   x.astype(int, errors='ignore')
   ref = x.name[-1]

   # Encoding of PED file
   if ref=="G":
     x = x.replace(0, "G G")
     x = x.replace(1, "G C")
     x = x.replace(2, "C C")

   if ref=="C":
     x = x.replace(0, "C C")
     x = x.replace(1, "C G")
     x = x.replace(2, "G G")

   if ref=="T":
     x = x.replace(0, "T T")
     x = x.replace(1, "T A")
     x = x.replace(2, "A A")

   if ref=="A":
     x = x.replace(0, "A A")
     x = x.replace(1, "A T")
     x = x.replace(2, "T T")


   return x


# Extract SNPs names, which is in this format SNP_REFAllele
#os.system("cat "+input_file+" | head -n 1  >> snps.txt")
print("cat "+input_file+" | head -n 1  >> snps.txt")


data = pd.read_csv("snps.txt",index_col=None,header=None,sep="\s+").loc[:, 6:].T

# Make a directory to store chunks
# Chunking is required because RAW file is usually large in size
if not os.path.isdir("Chunks"):
   os.mkdir("Chunks")

# Make ".MAP" file
# RAW file does not contain the position and chromosome number information so, all other columns except 2nd are 0.
maps = pd.DataFrame()
maps[0] = [0]*len(data)
maps[1] = data[0].values
maps[2] = [0]*len(data)
maps[3] = [0]*len(data)
maps.to_csv("final.map",sep="\t",header=False,index=False)

_smallraw  = os.listdir('./Chunks')
count=0
_smallraw = sorted(_smallraw)

# Encode each chunk which is same as that of ped file.
for files in _smallraw:
   if ".txt" not in files:
      if count==0:
         count=1
         data2 = pd.read_csv("Chunks"+os.sep+files,sep="\s+")
         data2[list(data[0].values)] = data2[list(data[0].values)].apply(converter)
         data2.to_csv("Chunks"+os.sep+files+".txt",sep="\t",index=False,header=False)

     else:
         data2 = pd.read_csv("Chunks"+os.sep+files,sep="\s+",names=list(data2.columns.values))
         data2[list(data[0].values)] = data2[list(data[0].values)].apply(converter)
         data2.to_csv("Chunks"+os.sep+files+".txt",sep="\t",index=False,header=False)
final = pd.DataFrame()
#Merge all chunks
for files in _smallraw:
   if ".txt" in files:
      if count==0:
         count=1
         final = pd.read_csv("Chunks"+os.sep+files,sep="\t",index_col=None,low_memory=False,header=None)
      else:
         data2 = pd.read_csv("Chunks"+os.sep+files,sep="\t",header=None,index_col=None,low_memory=False)
         final = final.append(data2, ignore_index=True)
         del data2
final.to_csv("final.ped",sep="\t",index=False,header=None)

# After this step you will have two files: final.ped and final.map

RAW to VCF¶

#Step 1. Convert Raw file to PED-MAP. See RAWtoPED-MAP.
It generates two files: final.ped and final.map
#Step 2.
./plink --file final --recode vcf --out output_file

RAW to BED-BIM-FAM¶

#Step 1. Convert Raw file to PED-MAP. See RAWtoPED-MAP.
It generates two files: final.ped and final.map
#Step 2.
./plink --file final  --make-bed --out output_file

RAW to GEN-SAMPLE¶

#Step 1. Convert Raw file to PED-MAP. See RAWtoPED-MAP.
It generates two files: final.ped and final.map
#Step 2.
./plink --file final  --export oxford --out output_file

RAW to 23andme¶

#Step 1. Convert Raw file to PED-MAP. See RAWtoPED-MAP.
It generates two files: final.ped and final.map

#Step 2. Convert PED-MAP to BED-BIM-FAM  --> PED-MAPtoBED-BIM-FAM()
./plink --file input_file --make-bed --out output_file

2. Convert BED-BIM-FAM to 23andme --> BED-BIM-FAMto23andme()
See BED-BIM-FAMto23andme

RAW to AncestryDNA¶

#Step 1. Convert Raw file to PED-MAP. See RAWtoPED-MAP.
It generates two files: final.ped and final.map

#Step 2. Convert PED-MAP to BED-BIM-FAM  --> PED-MAPtoBED-BIM-FAM()
./plink --file input_file --make-bed --out output_file

#Step 3. Convert BED-BIM-FAM to 23andme --> BED-BIM-FAMto23andme()
See BED-BIM-FAMto23andme

#Step 4. Convert 23andme to AncestryDNA --> 23andmetoAncestryDNA()
See 23andmetoAncestryDNA

RAW to HAPS-LEGEND-SAMPLE¶

#Step 1. Convert Raw file to PED-MAP.
It generates two files: final.ped and final.map
#Step 2. Convert PED-MAP file to VCF.
./plink --file  final --recode vcf --out output_file
#Step 3. Convert VCF file to HAPS-LEGEND-SAMPLE.
bcftools convert output_file.vcf  -h  output_file2