function [Nsq,varargout] = TRG_Square_Ex (A,B,log2L,Nkeep,varargin)
% < Description >
%
% Nsq         = TRG_Square_Ex (A,B,log2L,Nkeep)
% [Nsq,OOval] = TRG_Square_Ex (A,B,log2L,Nkeep,OA,OB)
%
% Compute the squared norm (e.g., < \Psi | \Psi >) per site, of a projected
% entangled-pair state (PEPS) on a square lattice of size L x L with
% periodic boundary condition, by using the tensor renormalization group
% (TRG) method. Here the PEPS is built from a unit cell of two sites,
% represented by local ket tensors A and B, which correspond to two
% bipartition sublattices, respectively. If local operators OA and OB are
% given, this function computes the nearest-neighbour correlator of OA and
% OB, with respect to the PEPS.
%
% < Input >
% A, B : [rank-4 tensors] Local tensors for each sublattice. The leg order
%       is left-up-physical-down-right.
% log2L : [integer] Base-2 logartithm of L, where the lattice is of size L
%       by L.
% Nkeep : [numeric] Maximum bond dimension.
% OA, OB : [rank-2 or rank-3 tensors] (Optional) The operators acting onto
%       nearest-neighbor sites. OA (OB) acts on a site whose ket state is
%       represented by A (B). The 1st legs of OA and OB act on the bra, and
%       the last legs on the ket. If they are rank-3, the 2nd legs of A and
%       B are to be contracted each other.
%
% < Output >
% Nsq : [numeric] The squared norm of the PEPS, per lattice site. The
%       total norm of the PEPS is given by Nsq^(L^2), where L = 2^log2L.
% OOval : [numeric] (Optional) Only for when OA and OB are given. The
%       correlation function of local operators OA and OB acting onto
%       nearest-neighbor sites.
%
% Written by S.Lee (Jul.25,2017)
% Updated by S.Lee (Jul.08,2019): Revised for SoSe 2019.
% Updated by S.Lee (Jul.18,2019): Critical error was found. There should be
%       two uniform tensors, since we start from two ket tensors A and B,
%       for A and B sublattices. Code was largely revised to fix this
%       problem.

tobj = tic2;
isOp = false; % default: no operator -> not compute correlator

% parsing optional input
if nargin > 4
    isOp = true;
    OA = varargin{1};
    OB = varargin{2};
end

disp(['TERG on a square lattice: size = 2^',sprintf('%g',log2L),' x 2^',sprintf('%g',log2L), ...
    ', Nkeep = ',sprintf('%g',Nkeep)]);

% Permute the legs of A and B, for future convenience. After permutation,
% they would look like:
%
%       |2         |4
%   3   |   1   1  |   3  (Physical legs are the last, i.e., the 5th;
%   --- A -------- B ---     they are hidden for brevity)
%       |          |  
%      4|          |2
%
% Here the legs are ordered in counter-clockwise order.
% The n-th leg of A contracts with the n-th leg of B for n = 1,2,3. This
% correspondence gives the future convenience.
A = permute(A,[5 2 1 4 3]);
B = permute(B,[1 4 5 2 3]);

% make double tensor (transfer operator)
z = 4; % coordination number = number of nearest neighbours

% permute legs to fuse bond legs
ids = [(1:z);z+(1:z)]; % leg permutation
gA = contract(A,z+1,z+1,conj(A),z+1,z+1,ids(:));
gB = contract(B,z+1,z+1,conj(B),z+1,z+1,ids(:));

dimA = ones(2,z); dimB = ones(2,z); % bond dimensions
dimA(1:ndims(gA)) = size(gA);
dimB(1:ndims(gB)) = size(gB);

% fuse bond legs by reshaping
gA = reshape(gA,prod(dimA,1)); 
gB = reshape(gB,prod(dimB,1));

% gA and gB will evolve to "uniform tensors", which tile the whole PEPS
% except for the central part which involves local operators O.

if isOp
    if size(OA,3) > 1 % if OA is rank-3
        % permute the legs so that bra - ket - operator
        OA = permute(OA,[1 3 2]);
    end
    if size(OB,3) > 1 % if OB is rank-3
        % permute the legs so that bra - ket - operator
        OB = permute(OB,[1 3 2]);
    end 
    
    % permute legs to fuse bond legs
    ids = 1+[(1:z);z+(1:z)]; % leg permutation
    gOA = contract(A,z+1,z+1,OA,3,2,[z+2 (1:z+1)]); % put the operator leg of OA to the first
    gOA = contract(gOA,z+2,z+2,conj(A),z+1,z+1,[1;ids(:)]);
    gOB = contract(B,z+1,z+1,OB,3,2,[z+2 (1:z+1)]); % put the operator leg of OB to the first
    gOB = contract(gOB,z+2,z+2,conj(B),z+1,z+1,[1;ids(:)]);
    
    % fuse bond legs by reshaping
    dimA = size(gOA); dimB = size(gOB);
    dimA2 = ones(2,z); dimB2 = ones(2,z);
    dimA2(1:(numel(dimA)-1)) = dimA(2:end);
    dimB2(1:(numel(dimB)-1)) = dimB(2:end);
    dimA2 = prod(dimA2,1); dimA2(1) = dimA2(1)*dimA(1);
    dimB2 = prod(dimB2,1); dimB2(1) = dimB2(1)*dimB(1);
    
    gOA = reshape(gOA,dimA2);
    gOB = reshape(gOB,dimB2);
    
    Timp = {gOA,gOB;gB,gA}; % "impurity tensors": the central part of the
    % PEPS which involves local operators O. The rest is tiled by the
    % uniform tensor.
%     
%         |2               |4
%  3      |       1 1      |        3
%  --- Timp{1,1} ------ Timp{1,2} ---
%         |4              2|
%  1     4|                |2
%  --- Timp{2,1} ------ Timp{2,2} ---
%         |       3 3      |        1
%        2|               4|

end

% uniform tensors
TA = gA; TB = gB;
% squared norm of the PEPS, per site
Nsq = 1;

disptime('Start');

for it1 = (1:(2*log2L-2)) % until 2*2 tensors remain
    Tnorm = sqrt(norm(TA(:))*norm(TB(:))); % geometric mean of the norms of two uniform tensors
    % T represents 2^(it1-1) number of lattice sites
    Nsq = Nsq*exp(log(Tnorm)/(2^(it1-1))); % the squared norm *per site*
    TA = TA/Tnorm;
    TB = TB/Tnorm;
    [U1,S1,V1] = svdTr(TB,4,[1 4],Nkeep,[]); % B sublattice
    [U2,S2,V2] = svdTr(TA,4,[3 4],Nkeep,[]); % A sublattice
    
    % normalize impurity tensors
    if isOp
        for it2 = (1:numel(Timp))
            Timp{it2} = Timp{it2}/Tnorm;
        end
    end
    
    % % % % TODO - Exercise (a)  (Start) % % % %
    % take the square root of singular values, and contract the square root
    % with the isometries (U1, V1, ...)
    % B sublattice
    
    
    % A sublattice
    
    
    % % % % TODO - Exercise (a)  (End) % % % %

    % contract the tensors around a plaquette to make the coarse-grained uniform tensor
    TA = contPlaq(U1,U2,V1,V2);
    TB = contPlaq(V1,V2,U1,U2);
    % after each RG step, the whole lattice rotates clockwise by 45 degree
    
    if isOp
        % when local operators to be measured are given, coarse-grain
        % "impurity" tensors also
        
        
        % % % % TODO - Exercise (a)  (Start) % % % %
        for it2 = (1:numel(Timp))
            % take the SVD of Timp{it2}, take the square root of the
            % singular values, and contract the square root with the
            % isometries
            
            % A sublattice
            
            % B sublattice
            
        end
        
        % obtain Timp{m,n} by contracting U1, V1, etc. Use contPlaq for
        % simple implementation.
        
        
        % % % % TODO - Exercise (a)  (End) % % % %
    end
    
    disptime(['#',sprintf('%02i/%02i',[it1,(2*log2L-2)])]);
end

% now we have only four tensors on 2 x 2 lattice; contract exactly
Ttmp1 = contract(TA,4,[1 3],TB,4,[1 3]); % 1st row
Ttmp2 = contract(TB,4,[1 3],TA,4,[1 3]); % 2nd row
res = contract(Ttmp1,4,(1:4),Ttmp2,4,(1:4));
Nsq = Nsq*exp(log(res)/(2^(2*log2L)));

if isOp
    Ttmp1 = contract(Timp{1,1},4,[1 3],Timp{1,2},4,[1 3]); % 1st row
    Ttmp2 = contract(Timp{2,1},4,[1 3],Timp{2,2},4,[1 3]); % 2nd row
    % only the ratio of the contraction results of final small tensor
    % networks matters
    varargout{1} = contract(Ttmp1,4,(1:4),Ttmp2,4,(1:4))/res;

strs = ['Squared norm per site = ',sprintf('%.6g',Nsq)];
if isOp
    strs = [strs,', Correlation func. = ',sprintf('%.6g',varargout{1})];
end
disptime(strs);
toc2(tobj,'-v');

end