################################################################
## Analyze the intersections between origins and other features
## (chromatin marks, ...). 

#include ${RSAT}/makefiles/util.mk
include scripts/makefiles/01_peak_calling.mk
MAKEFILE=makefiles/06_marks_vs_oris_and_others.mk

DIR_BED_FILES=data/marks_BED
BED_DESCR_FILE=${DIR_BED_FILES}/bed_file_description.txt 
OTHER_FILES=`grep -v '^;' ${BED_DESCR_FILE} | grep -v '^\#' | awk '$$1 != "" {print $$1}' | xargs | perl -pe 's| +| ${DIR_BED_FILES}\/|g' | awk '{print "${DIR_BED_FILES}/"$$0}'`
OTHER_FILES_BASENAME=`grep -v '^;' ${BED_DESCR_FILE} | grep -v '^\#' | awk '$$1 != "" {print $$1}' | xargs | perl -pe 's| +| ${DIR_BED_FILES}\/|g' | perl -pe 's|\.bed||g' | awk '{print "${DIR_BED_FILES}/"$$0}'`
OTHER_FILES_SORTED=`grep -v '^;' ${BED_DESCR_FILE} | grep -v '^\#' | awk '$$1 != "" {print $$1}' | xargs | perl -pe 's| +| ${DIR_BED_FILES}\/|g' | perl -pe 's|\.bed|_bypos.bed|g' | awk '{print "${DIR_BED_FILES}/"$$0}'`

SET1=GSE41589_H3K27me3_ShRd_BindingSites
SET2=GSM1000121_mm9_Ese14H3k04me1

list_param_06:
	@echo "Sorting bed by position"
	@echo "BED_DESCR_FILE			${BED_DESCR_FILE}"
	@echo "SWEMBL_PREFIX			${SWEMBL_PREFIX}"
	@echo "SWEMBL_SUMMITS_SORTED		${SWEMBL_SUMMITS_SORTED}"
	@echo "SWEMBL_SUMMITS_FLANKS		${SWEMBL_SUMMITS_FLANKS}"
	@echo "SWEMBL_SUMMITS_FLANKS_BYPOS	${SWEMBL_SUMMITS_FLANKS_BYPOS}"
	@echo "OTHER_FILES			${OTHER_FILES}"
	@echo "OTHER_FILES_BASENAME		${OTHER_FILES_BASENAME}"
	@echo "OTHER_FILES_SORTED		${OTHER_FILES_SORTED}"


FILE1=${SET1}.bed
FILE2=${SET2}.bed
DIR_COMPA=${DIR_BED_FILES}/comparisons
SET1_VS_SET2=${DIR_COMPA}/${SET1}__vs__${SET2}
compare_2_files:
	@echo
	@echo "${DATE}	Comparing dataset ${SET1} vs ${SET2}"
	@mkdir -p ${DIR_COMPA}
	(cd ${DIR_BED_FILES}; compare-features -v 1 -i ${FILE1} -i ${FILE2} \
		-iformat bed -oformat ft -self -return stats,inter,diff \
		-o ${SET1_VS_SET2}.ft)
	@echo "	${SET1_VS_SET2}.ft"

## Select a fixed-width window around the peak summits (e.g.300bp on each side)
SUMMIT_FLANK_SIZE=300
SWEMBL_SUMMITS_FLANKS=${SWEMBL_SUMMITS_SORTED}_flanks${SUMMIT_FLANK_SIZE}
select_summit_flanks:
	@echo
	@echo "Selecting ${SUMMIT_FLANK_SIZE}bp-wide flanks on each site of peak summits"
	@echo "Input file (SWEMBL summits)"
	@echo "	${SWEMBL_SUMMITS_SORTED}.bed"
	@grep -v '^\#' ${SWEMBL_SUMMITS_SORTED}.bed \
		| awk -F'\t' '{print $$1"\t"($$2-${SUMMIT_FLANK_SIZE})"\t"($$2+${SUMMIT_FLANK_SIZE})"\t"$$1"_"($$2-${SUMMIT_FLANK_SIZE})"_"($$2+${SUMMIT_FLANK_SIZE})"_+\t"$$4}' \
		> ${SWEMBL_SUMMITS_FLANKS}.bed
	@echo "	${SWEMBL_SUMMITS_FLANKS}.bed"
	@${MAKE} sort_bed_file TO_SORT=${SWEMBL_SUMMITS_FLANKS}.bed SORTED=${SWEMBL_SUMMITS_FLANKS_BYPOS}.bed
	@echo "	${SWEMBL_SUMMITS_FLANKS_BYPOS}.bed"

## Sort all the reference bed files (marks, ...)
sort_other_files:
	@echo
	@echo "Sorting bed files	 ${OTHER_FILES_BASENAME}"
	@for file in ${OTHER_FILES_BASENAME} ; do \
		${MAKE} sort_bed_file TO_SORT=$${file}.bed SORTED=$${file}_bypos.bed; \
	done


SWEMBL_MULTINTER=${SWEMBL_SUMMITS_FLANKS_BYPOS}_multiIntersectBed.tab
SWEMBL_ANNOTATE_COUNTS=${SWEMBL_SUMMITS_FLANKS_BYPOS}_annotateBed_counts.tab
SWEMBL_ANNOTATE_PERCENTS=${SWEMBL_SUMMITS_FLANKS_BYPOS}_annotateBed_percents.tab
SWEMBL_SUMMITS_FLANKS_BYPOS=${DIR_BED_FILES}/${SWEMBL_PREFIX}_summits_flanks300_bypos
swembl_summits_vs_others:
	@echo
	@echo "Comparing SWEMBL summits to other bed files"
	@echo "OTHER_FILES_SORTED	'${OTHER_FILES_SORTED}'"
#	compare-features -v ${V} -ref ${SWEMBL_SUMMITS_FLANKS}.bed -filelist ${BED_DESCR_FILE} \
#		-iformat bed -oformat ft  -return stats -o ${SWEMBL_MULTINTER}
	@echo "Running multiIntersectBed"
	@echo "#chrom	start	end	num	list	${OTHER_FILES_SORTED}"| perl -pe 's|${DIR_BED_FILES}/||g' | perl -pe 's|\.bed||g' > ${SWEMBL_MULTINTER}
	multiIntersectBed -header -i  ${SWEMBL_SUMMITS_FLANKS_BYPOS}.bed  ${OTHER_FILES_SORTED} >> ${SWEMBL_MULTINTER}
	@echo "	${SWEMBL_MULTINTER}"
	@echo "Running annotateBed to count intersections"
	@echo "chr	start_pos	end__pos	identifier	identifier_bis	${OTHER_FILES_SORTED}"| perl -pe 's|${DIR_BED_FILES}/||g' | perl -pe 's|\.bed||g' > ${SWEMBL_ANNOTATE_COUNTS}
	annotateBed -i  ${SWEMBL_SUMMITS_FLANKS_BYPOS}.bed -counts -files  ${OTHER_FILES_SORTED} >> ${SWEMBL_ANNOTATE_COUNTS}
	@echo "Running annotateBed to measure percents of intersections"
	@echo "chr	start_pos	end__pos	identifier	identifier_bis	${OTHER_FILES_SORTED}"| perl -pe 's|${DIR_BED_FILES}/||g' | perl -pe 's|\.bed||g' > ${SWEMBL_ANNOTATE_PERCENTS}
	annotateBed -i  ${SWEMBL_SUMMITS_FLANKS_BYPOS}.bed -files  ${OTHER_FILES_SORTED} >> ${SWEMBL_ANNOTATE_PERCENTS}
	@echo "	${SWEMBL_ANNOTATE_PERCENTS}"

