Mercurial > hg > extraction-interface
comparison js/taggingtext.js @ 89:e681d693240e extractapp
new: generated regex to SmartRegex
author | Zoe Hong <zhong@mpiwg-berlin.mpg.de> |
---|---|
date | Wed, 03 Jun 2015 16:54:09 +0200 |
parents | fb5049fc5dd7 |
children | a05491461199 |
comparison
equal
deleted
inserted
replaced
88:61593b047289 | 89:e681d693240e |
---|---|
576 }); | 576 }); |
577 | 577 |
578 function genRegexWindowOpen(){ | 578 function genRegexWindowOpen(){ |
579 var btn_state = $('#regex_generator').css('display'); | 579 var btn_state = $('#regex_generator').css('display'); |
580 if (btn_state == "block") { | 580 if (btn_state == "block") { |
581 $("#regex_generator").css("display", "none"); | 581 genRegexWindowClose(); |
582 $("#gen_regex_window_open_id").text("Open Gen Regex"); | |
583 } else { | 582 } else { |
584 $('#regex_generator').css("display", "block"); | 583 $('#regex_generator').css("display", "block"); |
585 $("#gen_regex_window_open_id").text("Close Gen Regex"); | 584 $("#gen_regex_window_open_id").text("Close Gen Regex"); |
586 } | 585 } |
587 } | 586 } |
588 function genRegexWindowClose(){ | 587 function genRegexWindowClose(){ |
589 $('#regex_generator').css("display", "none"); | 588 $('#regex_generator').css("display", "none"); |
589 $("#gen_regex_window_open_id").text("Open Gen Regex"); | |
590 } | 590 } |
591 | 591 |
592 function sharedStart_(array){ | 592 function sharedStart_(array){ |
593 var A= array.concat().sort(), | 593 var A= array.concat().sort(), |
594 a1= A[0], a2= A[A.length-1], L= a1.length, i= 0; | 594 a1= A[0], a2= A[A.length-1], L= a1.length, i= 0; |
595 while(i<L && a1.charAt(i)=== a2.charAt(i)) i++; | 595 while(i<L && a1.charAt(i)=== a2.charAt(i)) i++; |
596 return a1.substring(0, i); | 596 return a1.substring(0, i); |
597 } | |
598 | |
599 function getTagNameByTag(tag){ | |
600 var name = ""; | |
601 // taglistArray is a global variable | |
602 for (var i = 0; i < taglistArray.length; i++) { | |
603 var taglistValue = taglistArray[i]; | |
604 | |
605 var _tag = taglistValue[2]; | |
606 var _name = taglistValue[1]; | |
607 if (_tag == tag) { | |
608 name = _name; | |
609 } | |
610 } | |
611 | |
612 return name; | |
613 } | |
614 | |
615 function genRegexAddToSmartRegex() { | |
616 // append blocks of generated regex to smart regex | |
617 | |
618 smartRegexEmpty(); // clear | |
619 | |
620 var reg_obj = getSuggestedRegex(); | |
621 console.log(reg_obj); | |
622 | |
623 for (var i = 0; i < reg_obj.length; i++) { | |
624 if (reg_obj[i].txt != "") { | |
625 | |
626 var newdiv = document.createElement("span"); | |
627 | |
628 $(newdiv).css("border", "1px solid black"); | |
629 $(newdiv).css("width", "100px"); | |
630 | |
631 if (reg_obj[i].tag == null) { | |
632 if (reg_obj[i].txt == "○") { | |
633 $(newdiv).text("空白"); | |
634 } else { | |
635 $(newdiv).text(reg_obj[i].txt); | |
636 } | |
637 $(newdiv).attr("class", "span_NOTAG"); | |
638 $(newdiv).attr("regexReplace","NOTAG"); | |
639 } else { | |
640 | |
641 var name = getTagNameByTag(reg_obj[i].tag); | |
642 $(newdiv).text(name+"名"); | |
643 | |
644 $(newdiv).attr("class", "span_"+reg_obj[i].tag); | |
645 $(newdiv).attr("regexReplace", reg_obj[i].tag); | |
646 | |
647 } | |
648 | |
649 regex_element_index += 1; | |
650 $(newdiv).attr("id", "regex_elem_"+regex_element_index); | |
651 | |
652 $(newdiv).attr("regexText", reg_obj[i].txt); | |
653 | |
654 $('#smartRegexShowDiv').append(newdiv); | |
655 | |
656 } | |
657 } | |
658 | |
597 } | 659 } |
598 | 660 |
599 function longestCommonSubstring_(string1, string2){ | 661 function longestCommonSubstring_(string1, string2){ |
600 // init max value | 662 // init max value |
601 var longestCommonSubstring = 0; | 663 var longestCommonSubstring = 0; |
656 } | 718 } |
657 return s1.substring(start_idx, (start_idx + max_len)); | 719 return s1.substring(start_idx, (start_idx + max_len)); |
658 | 720 |
659 } | 721 } |
660 | 722 |
661 function getRegex(_pattern) { | 723 var suggestedRegex = []; |
724 | |
725 function setSuggestedRegex(_pattern) { | |
726 /* | |
662 console.log(_pattern[0]); | 727 console.log(_pattern[0]); |
663 console.log(_pattern[1]); | 728 console.log(_pattern[1]); |
729 */ | |
664 var p0 = _pattern[0]; | 730 var p0 = _pattern[0]; |
665 var p1 = _pattern[1]; | 731 var p1 = _pattern[1]; |
666 | 732 |
667 // TODO: find common pattern | 733 // TODO: find common pattern |
668 var reg_str = ""; | 734 suggestedRegex = []; // it's a global variable |
669 // _p1 = 測試 | 735 |
670 // _p2 = 測<tag_name>試</tag_name>一下 | |
671 var combined = []; | |
672 if (p0.length > p1.length) { | 736 if (p0.length > p1.length) { |
673 combined = p0; | 737 suggestedRegex = p0; |
674 } else if(p0.length < p1.length) { | 738 } else if(p0.length < p1.length) { |
675 combined = p1; | 739 suggestedRegex = p1; |
676 } else { // equal length | 740 } else { // equal length |
677 // find matching string | 741 // find matching string |
678 var cnt = p0.length; | 742 var cnt = p0.length; |
679 for (var i = 0; i < cnt; i++) { | 743 for (var i = 0; i < cnt; i++) { |
680 if (p1[i].tag != null) { | 744 if (p1[i].tag != null) { |
681 combined.push({tag:p1[i].tag, txt:"[^○如即而之有<>〈〉【】]{1,"+p1[i].txt.length+"}"}); | 745 suggestedRegex.push({tag:p1[i].tag, txt:"[^○如即而之有<>〈〉【】]{1,"+p1[i].txt.length+"}"}); |
682 } else if (p0[i].tag != null) { | 746 } else if (p0[i].tag != null) { |
683 combined.push({tag:p0[i].tag, txt:"[^○如即而之有<>〈〉【】]{1,"+p0[i].txt.length+"}"}); | 747 suggestedRegex.push({tag:p0[i].tag, txt:"[^○如即而之有<>〈〉【】]{1,"+p0[i].txt.length+"}"}); |
684 } else { | 748 } else { |
685 // find matching for text in each corresponding position | 749 // find matching for text in each corresponding position |
686 var texts = [p0[i].txt, p1[i].txt]; | 750 var texts = [p0[i].txt, p1[i].txt]; |
687 var common = longestCommonSubstring(p0[i].txt, p1[i].txt); | 751 var common = longestCommonSubstring(p0[i].txt, p1[i].txt); |
688 | 752 |
692 common[i]; | 756 common[i]; |
693 reg_for_common += common[i]+"|"; | 757 reg_for_common += common[i]+"|"; |
694 }; | 758 }; |
695 reg_for_common += "]"; | 759 reg_for_common += "]"; |
696 */ | 760 */ |
697 combined.push({tag:null, txt:common}); | 761 suggestedRegex.push({tag:null, txt:common}); |
698 } | 762 } |
699 }; | 763 }; |
700 } | 764 } |
701 | 765 |
702 for (var i = 0; i < combined.length; i++) { | 766 |
703 reg_str += combined[i].txt; | 767 } |
704 }; | 768 |
705 | 769 function getSuggestedRegex(){ |
706 return reg_str; | 770 return suggestedRegex; |
707 } | 771 } |
708 | 772 |
709 var pattern_obj = []; // record pattern array for regex generator. only contain pattern1 and pattern2 | 773 var pattern_obj = []; // record pattern array for regex generator. only contain pattern1 and pattern2 |
710 | 774 |
711 function genRegexBySelection(tag_item_div, _selection) { | 775 function genRegexBySelection(tag_item_div, _selection) { |
712 var add_gen_regex_button = document.createElement("button"); | 776 var add_gen_regex_button = document.createElement("button"); |
713 $(add_gen_regex_button).id = "addToGenRegex"; | 777 $(add_gen_regex_button).id = "addToGenRegex"; |
714 $(add_gen_regex_button).addClass("btn btn-md"); | 778 $(add_gen_regex_button).addClass("btn btn-md"); |
715 $(add_gen_regex_button).click( function(){ | 779 $(add_gen_regex_button).click( function(){ |
716 // popup for selected words regex gen | 780 // popup for selected words regex gen |
781 /* | |
717 console.log("Debug: "); | 782 console.log("Debug: "); |
718 console.log(_selection); | 783 console.log(_selection); |
719 | 784 */ |
785 | |
720 if (_selection.type == "Range") { | 786 if (_selection.type == "Range") { |
721 // select words, not just click on text | 787 // select words, not just click on text |
722 var anchor_node = _selection.anchorNode; | 788 var anchor_node = _selection.anchorNode; |
723 var focus_node = _selection.focusNode; | 789 var focus_node = _selection.focusNode; |
724 var sibling_node = anchor_node.nextElementSibling; | 790 var sibling_node = anchor_node.nextElementSibling; |
749 | 815 |
750 $(seleted_div).text(text_before+tagged_text+text_after); | 816 $(seleted_div).text(text_before+tagged_text+text_after); |
751 seleted_obj.push({tag:null, txt:text_before}); | 817 seleted_obj.push({tag:null, txt:text_before}); |
752 seleted_obj.push({tag:tag_name, txt:tagged_text}); | 818 seleted_obj.push({tag:tag_name, txt:tagged_text}); |
753 seleted_obj.push({tag:null, txt:text_after}); | 819 seleted_obj.push({tag:null, txt:text_after}); |
754 | 820 /* |
755 | |
756 console.log(text_before); | 821 console.log(text_before); |
757 console.log(tag_name); | 822 console.log(tag_name); |
758 console.log(tagged_text); | 823 console.log(tagged_text); |
759 console.log(text_after); | 824 console.log(text_after); |
825 */ | |
760 } | 826 } |
761 | 827 |
762 | 828 |
763 var generated_regex = ""; | 829 |
830 var generated_regex_plaintext = ""; | |
764 // show generate regex window | 831 // show generate regex window |
765 $('#regex_generator').css("display", "block"); | 832 $('#regex_generator').css("display", "block"); |
766 $("#gen_regex_window_open_id").text("Close Gen Regex"); | 833 $("#gen_regex_window_open_id").text("Close Gen Regex"); |
767 | 834 |
768 //var seleted_text = String(_selection).replace(/^\s+|\s+$/g,''); | 835 //var seleted_text = String(_selection).replace(/^\s+|\s+$/g,''); |
773 pattern_obj.push(seleted_obj); | 840 pattern_obj.push(seleted_obj); |
774 // pattern1.text(seleted_div.text()); | 841 // pattern1.text(seleted_div.text()); |
775 } else if (pattern2.children().length == 0) { | 842 } else if (pattern2.children().length == 0) { |
776 pattern2.append(seleted_div); | 843 pattern2.append(seleted_div); |
777 pattern_obj.push(seleted_obj); | 844 pattern_obj.push(seleted_obj); |
778 //pattern2.text(seleted_div.text()); | 845 |
779 generated_regex = getRegex(pattern_obj); | 846 setSuggestedRegex(pattern_obj); |
847 var generated_regex = getSuggestedRegex(); | |
848 | |
849 // get plaintext from generated_regex obj | |
850 for (var i = 0; i < generated_regex.length; i++) { | |
851 generated_regex_plaintext += generated_regex[i].txt; | |
852 } | |
780 | 853 |
781 } else { | 854 } else { |
782 // pattern1 and pattern2 are already having text | 855 // pattern1 and pattern2 are already having text |
783 pattern1.children().remove(); | 856 pattern1.children().remove(); |
784 pattern1.append(pattern2.children()); | 857 pattern1.append(pattern2.children()); |
786 pattern2.children().remove(); | 859 pattern2.children().remove(); |
787 pattern2.append(seleted_div); | 860 pattern2.append(seleted_div); |
788 | 861 |
789 pattern_obj.shift(); | 862 pattern_obj.shift(); |
790 pattern_obj.push(seleted_obj); | 863 pattern_obj.push(seleted_obj); |
791 | 864 |
792 //pattern1.text(pattern2.text()); | 865 setSuggestedRegex(pattern_obj); |
793 //pattern2.text(seleted_div); | 866 var generated_regex = getSuggestedRegex(); |
794 generated_regex = getRegex(pattern_obj); | 867 |
868 // get plaintext from generated_regex obj | |
869 for (var i = 0; i < generated_regex.length; i++) { | |
870 generated_regex_plaintext += generated_regex[i].txt; | |
871 } | |
872 | |
795 | 873 |
796 } | 874 } |
797 $('#generated_regex').text(generated_regex); | 875 //$('#generated_regex').text(generated_regex); |
876 $('#generated_regex').text(generated_regex_plaintext); | |
798 // --- | 877 // --- |
799 | 878 |
800 $('#regex_generator_error_msg').text(""); | 879 $('#regex_generator_error_msg').text(""); |
801 } else { | 880 } else { |
802 $('#regex_generator_error_msg').text("Note: Not a valid selection for regex generator."); | 881 $('#regex_generator_error_msg').text("Note: Not a valid selection for regex generator."); |